move to storage/innobase

author: Sergei Golubchik <vuvova@gmail.com> 2015-05-04 19:17:21 +0200
committer: Sergei Golubchik <vuvova@gmail.com> 2015-05-04 19:17:21 +0200
commit: 6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch)
tree: 21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/include
parent: 1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff)
download: mariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz
227 files changed, 72797 insertions, 0 deletions
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
new file mode 100644
index 00000000000..d77d691becc
--- /dev/null
+++ b/storage/innobase/include/api0api.h
@@ -0,0 +1,1304 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0api.h
+InnoDB Native API
+
+2008-08-01 Created by Sunny Bains.
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#ifndef api0api_h
+#define api0api_h
+
+#include "db0err.h"
+#include <stdio.h>
+
+#ifdef _MSC_VER
+#define strncasecmp		_strnicmp
+#define strcasecmp		_stricmp
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#define UNIV_NO_IGNORE		__attribute__ ((warn_unused_result))
+#else
+#define UNIV_NO_IGNORE
+#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
+
+/* See comment about ib_bool_t as to why the two macros are unsigned long. */
+/** The boolean value of "true" used internally within InnoDB */
+#define IB_TRUE			0x1UL
+/** The boolean value of "false" used internally within InnoDB */
+#define IB_FALSE		0x0UL
+
+/* Basic types used by the InnoDB API. */
+/** All InnoDB error codes are represented by ib_err_t */
+typedef enum dberr_t		ib_err_t;
+/** Representation of a byte within InnoDB */
+typedef unsigned char		ib_byte_t;
+/** Representation of an unsigned long int within InnoDB */
+typedef unsigned long int	ib_ulint_t;
+
+/* We assume C99 support except when using VisualStudio. */
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif /* _MSC_VER */
+
+/* Integer types used by the API. Microsft VS defines its own types
+and we use the Microsoft types when building with Visual Studio. */
+#if defined(_MSC_VER)
+/** A signed 8 bit integral type. */
+typedef __int8			ib_i8_t;
+#else
+/** A signed 8 bit integral type. */
+typedef int8_t                  ib_i8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 8 bit integral type. */
+typedef unsigned __int8		ib_u8_t;
+#else
+/** An unsigned 8 bit integral type. */
+typedef uint8_t                 ib_u8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 16 bit integral type. */
+typedef __int16			ib_i16_t;
+#else
+/** A signed 16 bit integral type. */
+typedef int16_t                 ib_i16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 16 bit integral type. */
+typedef unsigned __int16	ib_u16_t;
+#else
+/** An unsigned 16 bit integral type. */
+typedef uint16_t                ib_u16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 32 bit integral type. */
+typedef __int32			ib_i32_t;
+#else
+/** A signed 32 bit integral type. */
+typedef int32_t                 ib_i32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 32 bit integral type. */
+typedef unsigned __int32	ib_u32_t;
+#else
+/** An unsigned 32 bit integral type. */
+typedef uint32_t                ib_u32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 64 bit integral type. */
+typedef __int64			ib_i64_t;
+#else
+/** A signed 64 bit integral type. */
+typedef int64_t                 ib_i64_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 64 bit integral type. */
+typedef unsigned __int64	ib_u64_t;
+#else
+/** An unsigned 64 bit integral type. */
+typedef uint64_t                ib_u64_t;
+#endif
+
+typedef void*			ib_opaque_t;
+typedef ib_opaque_t		ib_charset_t;
+typedef ib_ulint_t		ib_bool_t;
+typedef ib_u64_t		ib_id_u64_t;
+
+/** @enum ib_cfg_type_t Possible types for a configuration variable. */
+typedef enum {
+	IB_CFG_IBOOL,			/*!< The configuration parameter is
+					of type ibool */
+
+	/* XXX Can we avoid having different types for ulint and ulong?
+	- On Win64 "unsigned long" is 32 bits
+	- ulong is always defined as "unsigned long"
+	- On Win64 ulint is defined as 64 bit integer
+	=> On Win64 ulint != ulong.
+	If we typecast all ulong and ulint variables to the smaller type
+	ulong, then we will cut the range of the ulint variables.
+	This is not a problem for most ulint variables because their max
+	allowed values do not exceed 2^32-1 (e.g. log_groups is ulint
+	but its max allowed value is 10). BUT buffer_pool_size and
+	log_file_size allow up to 2^64-1. */
+
+	IB_CFG_ULINT,			/*!< The configuration parameter is
+					of type ulint */
+
+	IB_CFG_ULONG,			/*!< The configuration parameter is
+					of type ulong */
+
+	IB_CFG_TEXT,			/*!< The configuration parameter is
+					of type char* */
+
+	IB_CFG_CB			/*!< The configuration parameter is
+					a callback parameter */
+} ib_cfg_type_t;
+
+/** @enum ib_col_type_t  column types that are supported. */
+typedef enum {
+	IB_VARCHAR =	1,		/*!< Character varying length. The
+					column is not padded. */
+
+	IB_CHAR =	2,		/*!< Fixed length character string. The
+					column is padded to the right. */
+
+	IB_BINARY =	3,		/*!< Fixed length binary, similar to
+					IB_CHAR but the column is not padded
+					to the right. */
+
+	IB_VARBINARY =	4,		/*!< Variable length binary */
+
+	IB_BLOB	=	5,		/*!< Binary large object, or
+					a TEXT type */
+
+	IB_INT =	6,		/*!< Integer: can be any size
+					from 1 - 8 bytes. If the size is
+					1, 2, 4 and 8 bytes then you can use
+					the typed read and write functions. For
+					other sizes you will need to use the
+					ib_col_get_value() function and do the
+					conversion yourself. */
+
+	IB_SYS =	8,		/*!< System column, this column can
+					be one of DATA_TRX_ID, DATA_ROLL_PTR
+					or DATA_ROW_ID. */
+
+	IB_FLOAT =	9,		/*!< C (float)  floating point value. */
+
+	IB_DOUBLE =	10,		/*!> C (double) floating point value. */
+
+	IB_DECIMAL =	11,		/*!< Decimal stored as an ASCII
+					string */
+
+	IB_VARCHAR_ANYCHARSET =	12,	/*!< Any charset, varying length */
+
+	IB_CHAR_ANYCHARSET =	13	/*!< Any charset, fixed length */
+
+} ib_col_type_t;
+
+/** @enum ib_tbl_fmt_t InnoDB table format types */
+typedef enum {
+	IB_TBL_REDUNDANT,		/*!< Redundant row format, the column
+					type and length is stored in the row.*/
+
+	IB_TBL_COMPACT,			/*!< Compact row format, the column
+					type is not stored in the row. The
+					length is stored in the row but the
+					storage format uses a compact format
+					to store the length of the column data
+					and record data storage format also
+					uses less storage. */
+
+	IB_TBL_DYNAMIC,			/*!< Compact row format. BLOB prefixes
+					are not stored in the clustered index */
+
+	IB_TBL_COMPRESSED		/*!< Similar to dynamic format but
+					with pages compressed */
+} ib_tbl_fmt_t;
+
+/** @enum ib_col_attr_t InnoDB column attributes */
+typedef enum {
+	IB_COL_NONE = 0,		/*!< No special attributes. */
+
+	IB_COL_NOT_NULL = 1,		/*!< Column data can't be NULL. */
+
+	IB_COL_UNSIGNED = 2,		/*!< Column is IB_INT and unsigned. */
+
+	IB_COL_NOT_USED = 4,		/*!< Future use, reserved. */
+
+	IB_COL_CUSTOM1 = 8,		/*!< Custom precision type, this is
+					a bit that is ignored by InnoDB and so
+					can be set and queried by users. */
+
+	IB_COL_CUSTOM2 = 16,		/*!< Custom precision type, this is
+					a bit that is ignored by InnoDB and so
+					can be set and queried by users. */
+
+	IB_COL_CUSTOM3 = 32		/*!< Custom precision type, this is
+					a bit that is ignored by InnoDB and so
+					can be set and queried by users. */
+} ib_col_attr_t;
+
+/* Note: must match lock0types.h */
+/** @enum ib_lck_mode_t InnoDB lock modes. */
+typedef enum {
+	IB_LOCK_IS = 0,			/*!< Intention shared, an intention
+					lock should be used to lock tables */
+
+	IB_LOCK_IX,			/*!< Intention exclusive, an intention
+					lock should be used to lock tables */
+
+	IB_LOCK_S,			/*!< Shared locks should be used to
+					lock rows */
+
+	IB_LOCK_X,			/*!< Exclusive locks should be used to
+					lock rows*/
+
+	IB_LOCK_TABLE_X,		/*!< exclusive table lock */
+
+	IB_LOCK_NONE,			/*!< This is used internally to note
+					consistent read */
+
+	IB_LOCK_NUM = IB_LOCK_NONE	/*!< number of lock modes */
+} ib_lck_mode_t;
+
+typedef enum {
+	IB_CLUSTERED = 1,	/*!< clustered index */
+	IB_UNIQUE = 2		/*!< unique index */
+} ib_index_type_t;
+
+/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto().
+Note: Values must match those found in page0cur.h */
+typedef enum {
+	IB_CUR_G = 1,			/*!< If search key is not found then
+					position the cursor on the row that
+					is greater than the search key */
+
+	IB_CUR_GE = 2,			/*!< If the search key not found then
+					position the cursor on the row that
+					is greater than or equal to the search
+					key */
+
+	IB_CUR_L = 3,			/*!< If search key is not found then
+					position the cursor on the row that
+					is less than the search key */
+
+	IB_CUR_LE = 4			/*!< If search key is not found then
+					position the cursor on the row that
+					is less than or equal to the search
+					key */
+} ib_srch_mode_t;
+
+/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */
+typedef enum {
+	IB_CLOSEST_MATCH,		/*!< Closest match possible */
+
+	IB_EXACT_MATCH,			/*!< Search using a complete key
+					value */
+
+	IB_EXACT_PREFIX			/*!< Search using a key prefix which
+					must match to rows: the prefix may
+					contain an incomplete field (the
+					last field in prefix may be just
+					a prefix of a fixed length column) */
+} ib_match_mode_t;
+
+/** @struct ib_col_meta_t InnoDB column meta data. */
+typedef struct {
+	ib_col_type_t	type;		/*!< Type of the column */
+
+	ib_col_attr_t	attr;		/*!< Column attributes */
+
+	ib_u32_t	type_len;	/*!< Length of type */
+
+	ib_u16_t	client_type;	/*!< 16 bits of data relevant only to
+					the client. InnoDB doesn't care */
+
+	ib_charset_t*	charset;	/*!< Column charset */
+} ib_col_meta_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_state_t The transaction state can be queried using the
+ib_trx_state() function. The InnoDB deadlock monitor can roll back a
+transaction and users should be prepared for this, especially where there
+is high contention. The way to determine the state of the transaction is to
+query it's state and check. */
+typedef enum {
+	IB_TRX_NOT_STARTED,		/*!< Has not started yet, the
+					transaction has not ben started yet.*/
+
+	IB_TRX_ACTIVE,			/*!< The transaction is currently
+					active and needs to be either
+					committed or rolled back. */
+
+	IB_TRX_COMMITTED_IN_MEMORY,	/*!< Not committed to disk yet */
+
+	IB_TRX_PREPARED			/*!< Support for 2PC/XA */
+} ib_trx_state_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_level_t Transaction isolation levels */
+typedef enum {
+	IB_TRX_READ_UNCOMMITTED = 0,	/*!< Dirty read: non-locking SELECTs are
+					performed so that we do not look at a
+					possible earlier version of a record;
+					thus they are not 'consistent' reads
+					under this isolation level; otherwise
+					like level 2 */
+
+	IB_TRX_READ_COMMITTED = 1,	/*!< Somewhat Oracle-like isolation,
+					except that in range UPDATE and DELETE
+					we must block phantom rows with
+					next-key locks; SELECT ... FOR UPDATE
+					and ...  LOCK IN SHARE MODE only lock
+					the index records, NOT the gaps before
+					them, and thus allow free inserting;
+					each consistent read reads its own
+					snapshot */
+
+	IB_TRX_REPEATABLE_READ = 2,	/*!< All consistent reads in the same
+					trx read the same snapshot; full
+					next-key locking used in locking reads
+					to block insertions into gaps */
+
+	IB_TRX_SERIALIZABLE = 3		/*!< All plain SELECTs are converted to
+					LOCK IN SHARE MODE reads */
+} ib_trx_level_t;
+
+/** Generical InnoDB callback prototype. */
+typedef void (*ib_cb_t)(void);
+
+#define IB_CFG_BINLOG_ENABLED	0x1
+#define IB_CFG_MDL_ENABLED	0x2
+#define IB_CFG_DISABLE_ROWLOCK	0x4
+
+/** The first argument to the InnoDB message logging function. By default
+it's set to stderr. You should treat ib_msg_stream_t as a void*, since
+it will probably change in the future. */
+typedef FILE* ib_msg_stream_t;
+
+/** All log messages are written to this function.It should have the same
+behavior as fprintf(3). */
+typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...);
+
+/* Note: This is to make it easy for API users to have type
+checking for arguments to our functions. Making it ib_opaque_t
+by itself will result in pointer decay resulting in subverting
+of the compiler's type checking. */
+
+/** InnoDB tuple handle. This handle can refer to either a cluster index
+tuple or a secondary index tuple. There are two types of tuples for each
+type of index, making a total of four types of tuple handles. There
+is a tuple for reading the entire row contents and another for searching
+on the index key. */
+typedef struct ib_tuple_t* ib_tpl_t;
+
+/** InnoDB transaction handle, all database operations need to be covered
+by transactions. This handle represents a transaction. The handle can be
+created with ib_trx_begin(), you commit your changes with ib_trx_commit()
+and undo your changes using ib_trx_rollback(). If the InnoDB deadlock
+monitor rolls back the transaction then you need to free the transaction
+using the function ib_trx_release(). You can query the state of an InnoDB
+transaction by calling ib_trx_state(). */
+typedef struct trx_t* ib_trx_t;
+
+/** InnoDB cursor handle */
+typedef struct ib_cursor_t* ib_crsr_t;
+
+/*************************************************************//**
+This function is used to compare two data fields for which the data type
+is such that we must use the client code to compare them.
+
+@param col_meta		column meta data
+@param p1		key
+@oaram p1_len		key length
+@param p2		second key
+@param p2_len		second key length
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
+
+typedef int (*ib_client_cmp_t)(
+	const ib_col_meta_t*	col_meta,
+	const ib_byte_t*	p1,
+	ib_ulint_t		p1_len,
+	const ib_byte_t*	p2,
+	ib_ulint_t		p2_len);
+
+/* This should be the same as univ.i */
+/** Represents SQL_NULL length */
+#define	IB_SQL_NULL		0xFFFFFFFF
+/** The number of system columns in a row. */
+#define IB_N_SYS_COLS		3
+
+/** The maximum length of a text column. */
+#define MAX_TEXT_LEN		4096
+
+/* MySQL uses 3 byte UTF-8 encoding. */
+/** The maximum length of a column name in a table schema. */
+#define IB_MAX_COL_NAME_LEN	(64 * 3)
+
+/** The maximum length of a table name (plus database name). */
+#define IB_MAX_TABLE_NAME_LEN	(64 * 3) * 2
+
+/*****************************************************************//**
+Start a transaction that's been rolled back. This special function
+exists for the case when InnoDB's deadlock detector has rolledack
+a transaction. While the transaction has been rolled back the handle
+is still valid and can be reused by calling this function. If you
+don't want to reuse the transaction handle then you can free the handle
+by calling ib_trx_release().
+@return	innobase txn handle */
+
+ib_err_t
+ib_trx_start(
+/*=========*/
+	ib_trx_t	ib_trx,		/*!< in: transaction to restart */
+	ib_trx_level_t	ib_trx_level,	/*!< in: trx isolation level */
+	ib_bool_t	read_write,	/*!< in: true if read write
+					transaction */
+	ib_bool_t	auto_commit,	/*!< in: auto commit after each
+					single DML */
+	void*		thd);		/*!< in: THD */
+
+/*****************************************************************//**
+Begin a transaction. This will allocate a new transaction handle and
+put the transaction in the active state.
+@return	innobase txn handle */
+
+ib_trx_t
+ib_trx_begin(
+/*=========*/
+	ib_trx_level_t	ib_trx_level,	/*!< in: trx isolation level */
+	ib_bool_t	read_write,	/*!< in: true if read write
+					transaction */
+	ib_bool_t	auto_commit);	/*!< in: auto commit after each
+					single DML */
+
+/*****************************************************************//**
+Query the transaction's state. This function can be used to check for
+the state of the transaction in case it has been rolled back by the
+InnoDB deadlock detector. Note that when a transaction is selected as
+a victim for rollback, InnoDB will always return an appropriate error
+code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
+@see DB_LOCK_WAIT_TIMEOUT
+@return	transaction state */
+
+ib_trx_state_t
+ib_trx_state(
+/*=========*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Release the resources of the transaction. If the transaction was
+selected as a victim by InnoDB and rolled back then use this function
+to free the transaction handle.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_release(
+/*===========*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Commit a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_commit(
+/*==========*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Rollback a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_rollback(
+/*============*/
+	ib_trx_t	ib_trx);	/*!< in: trx handle */
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table_using_id(
+/*==========================*/
+	ib_id_u64_t	table_id,	/*!< in: table id of table to open */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr);	/*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB index and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_id(
+/*==========================*/
+	ib_id_u64_t	index_id,	/*!< in: index id of index to open */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr);	/*!< out: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB secondary index cursor and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_name(
+/*============================*/
+	ib_crsr_t	ib_open_crsr,	/*!< in: open/active cursor */
+	const char*	index_name,	/*!< in: secondary index name */
+	ib_crsr_t*	ib_crsr,	/*!< out,own: InnoDB index cursor */
+	int*		idx_type,	/*!< out: index is cluster index */
+	ib_id_u64_t*	idx_id);	/*!< out: index id */
+
+/*****************************************************************//**
+Open an InnoDB table by name and return a cursor handle to it.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table(
+/*=================*/
+	const char*	name,		/*!< in: table name */
+	ib_trx_t	ib_trx,		/*!< in: Current transaction handle
+					can be NULL */
+	ib_crsr_t*	ib_crsr);	/*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Reset the cursor.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_reset(
+/*============*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+
+/*****************************************************************//**
+set a cursor trx to NULL*/
+
+void
+ib_cursor_clear_trx(
+/*================*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close an InnoDB table and free the cursor.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close(
+/*============*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close the table, decrement n_ref_count count.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close_table(
+/*==================*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+update the cursor with new transactions and also reset the cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_new_trx(
+/*==============*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_trx_t	ib_trx);	/*!< in: transaction */
+
+/*****************************************************************//**
+Commit the transaction in a cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_commit_trx(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_trx_t	ib_trx);	/*!< in: transaction */
+
+/********************************************************************//**
+Open a table using the table name, if found then increment table ref count.
+@return table instance if found */
+
+void*
+ib_open_table_by_name(
+/*==================*/
+	const char*	name);		/*!< in: table name to lookup */
+
+/*****************************************************************//**
+Insert a row to a table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_insert_row(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor instance */
+	const ib_tpl_t	ib_tpl);	/*!< in: tuple to insert */
+
+/*****************************************************************//**
+Update a row in a table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_update_row(
+/*=================*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	const ib_tpl_t	ib_old_tpl,	/*!< in: Old tuple in table */
+	const ib_tpl_t	ib_new_tpl);	/*!< in: New tuple to update */
+
+/*****************************************************************//**
+Delete a row in a table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_delete_row(
+/*=================*/
+	ib_crsr_t	ib_crsr);	/*!< in: cursor instance */
+
+/*****************************************************************//**
+Read current row.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_read_row(
+/*===============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_tpl_t	ib_tpl,		/*!< out: read cols into this tuple */
+	void**		row_buf,	/*!< in/out: row buffer */
+	ib_ulint_t*	row_len);	/*!< in/out: row buffer len */
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_first(
+/*============*/
+	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the last record in the table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_last(
+/*===========*/
+	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the next record in the table.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_next(
+/*===========*/
+	ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Search for key.
+@return	DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_moveto(
+/*=============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_tpl_t	ib_tpl,		/*!< in: Key to search for */
+	ib_srch_mode_t	ib_srch_mode);	/*!< in: search mode */
+
+/*****************************************************************//**
+Set the match mode for ib_cursor_move(). */
+
+void
+ib_cursor_set_match_mode(
+/*=====================*/
+	ib_crsr_t	ib_crsr,	/*!< in: Cursor instance */
+	ib_match_mode_t	match_mode);	/*!< in: ib_cursor_moveto match mode */
+
+/*****************************************************************//**
+Set a column of the tuple. Make a copy using the tuple's heap.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_col_set_value(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	col_no,		/*!< in: column index in tuple */
+	const void*	src,		/*!< in: data value */
+	ib_ulint_t	len,		/*!< in: data value len */
+	ib_bool_t	need_cpy);	/*!< in: if need memcpy */
+
+
+/*****************************************************************//**
+Get the size of the data available in the column the tuple.
+@return	bytes avail or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_get_len(
+/*===========*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i);		/*!< in: column index in tuple */
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return	bytes copied or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_copy_value(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: tuple instance */
+	ib_ulint_t	i,		/*!< in: column index in tuple */
+	void*		dst,		/*!< out: copied data value */
+	ib_ulint_t	len);		/*!< in: max data value len to copy */
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i8(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i8_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 8 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u8(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u8_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 16 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i16(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i16_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 16 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u16(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u16_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 32 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i32(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i32_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 32 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u32(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u32_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 64 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i64(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_i64_t*	ival);		/*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 64 bit column from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u64(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_u64_t*	ival);		/*!< out: integer value */
+
+/*****************************************************************//**
+Get a column value pointer from the tuple.
+@return	NULL or pointer to buffer */
+
+const void*
+ib_col_get_value(
+/*=============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i);		/*!< in: column number */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return	len of column data */
+
+ib_ulint_t
+ib_col_get_meta(
+/*============*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	i,		/*!< in: column number */
+	ib_col_meta_t*	ib_col_meta);	/*!< out: column meta data */
+
+/*****************************************************************//**
+"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
+@return	new tuple, or NULL */
+
+ib_tpl_t
+ib_tuple_clear(
+/*============*/
+	ib_tpl_t	ib_tpl);	/*!< in: InnoDB tuple */
+
+/*****************************************************************//**
+Create a new cluster key search tuple and copy the contents of  the
+secondary index key tuple columns that refer to the cluster index record
+to the cluster key. It does a deep copy of the column data.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_get_cluster_key(
+/*=====================*/
+	ib_crsr_t	ib_crsr,	/*!< in: secondary index cursor */
+	ib_tpl_t*	ib_dst_tpl,	/*!< out,own: destination tuple */
+	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
+
+/*****************************************************************//**
+Copy the contents of  source tuple to destination tuple. The tuples
+must be of the same type and belong to the same table/index.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_copy(
+/*==========*/
+	ib_tpl_t	ib_dst_tpl,	/*!< in: destination tuple */
+	const ib_tpl_t	ib_src_tpl);	/*!< in: source tuple */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return tuple for current index */
+
+ib_tpl_t
+ib_sec_search_tuple_create(
+/*=======================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return	tuple for current index */
+
+ib_tpl_t
+ib_sec_read_tuple_create(
+/*=====================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for table key operations.
+@return	tuple for current table */
+
+ib_tpl_t
+ib_clust_search_tuple_create(
+/*=========================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple for table row operations.
+@return	tuple for current table */
+
+ib_tpl_t
+ib_clust_read_tuple_create(
+/*=======================*/
+	ib_crsr_t	ib_crsr);	/*!< in: Cursor instance */
+
+/*****************************************************************//**
+Return the number of user columns in the tuple definition.
+@return	number of user columns */
+
+ib_ulint_t
+ib_tuple_get_n_user_cols(
+/*=====================*/
+	const ib_tpl_t	ib_tpl);	/*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Return the number of columns in the tuple definition.
+@return	number of columns */
+
+ib_ulint_t
+ib_tuple_get_n_cols(
+/*================*/
+	const ib_tpl_t	ib_tpl);	/*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Destroy an InnoDB tuple. */
+
+void
+ib_tuple_delete(
+/*============*/
+	ib_tpl_t	ib_tpl);	/*!< in,own: Tuple instance to delete */
+
+/*****************************************************************//**
+Truncate a table. The cursor handle will be closed and set to NULL
+on success.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_truncate(
+/*===============*/
+	ib_crsr_t*	ib_crsr,	/*!< in/out: cursor for table
+					to truncate */
+	ib_id_u64_t*	table_id);	/*!< out: new table id */
+
+/*****************************************************************//**
+Get a table id.
+@return	DB_SUCCESS if found */
+
+ib_err_t
+ib_table_get_id(
+/*============*/
+	const char*	table_name,	/*!< in: table to find */
+	ib_id_u64_t*	table_id);	/*!< out: table id if found */
+
+/*****************************************************************//**
+Get an index id.
+@return	DB_SUCCESS if found */
+
+ib_err_t
+ib_index_get_id(
+/*============*/
+	const char*	table_name,	/*!< in: find index for this table */
+	const char*	index_name,	/*!< in: index to find */
+	ib_id_u64_t*	index_id);	/*!< out: index id if found */
+
+/*****************************************************************//**
+Check if cursor is positioned.
+@return	IB_TRUE if positioned */
+
+ib_bool_t
+ib_cursor_is_positioned(
+/*====================*/
+	const ib_crsr_t	ib_crsr);	/*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in exclusive mode by a
+user transaction.
+@return TRUE if exclusive latch */
+
+ib_bool_t
+ib_schema_lock_is_exclusive(
+/*========================*/
+	const ib_trx_t	ib_trx);	/*!< in: transaction */
+
+/*****************************************************************//**
+Lock an InnoDB cursor/table.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_lock(
+/*===========*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_lck_mode_t	ib_lck_mode);	/*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock an InnoDB table using the table id.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_lock(
+/*===========*/
+	ib_trx_t	ib_trx,		/*!< in/out: transaction */
+	ib_id_u64_t	table_id,	/*!< in: table id */
+	ib_lck_mode_t	ib_lck_mode);	/*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock mode of the cursor.
+@return	DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_set_lock_mode(
+/*====================*/
+	ib_crsr_t	ib_crsr,	/*!< in/out: InnoDB cursor */
+	ib_lck_mode_t	ib_lck_mode);	/*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set need to access clustered index record flag. */
+
+void
+ib_cursor_set_cluster_access(
+/*=========================*/
+	ib_crsr_t	ib_crsr);	/*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i8(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i8_t		val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i16(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i16_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i32(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i32_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i64(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_i64_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u8(
+/*==============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u8_t		val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u16(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u16_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u32(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u32_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return	DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u64(
+/*===============*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	ib_u64_t	val);		/*!< in: value to write */
+
+/*****************************************************************//**
+Inform the cursor that it's the start of an SQL statement. */
+
+void
+ib_cursor_stmt_begin(
+/*=================*/
+	ib_crsr_t	ib_crsr);	/*!< in: cursor */
+
+/*****************************************************************//**
+Write a double value to a column.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_double(
+/*==================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	int		col_no,		/*!< in: column number */
+	double		val);		/*!< in: value to write */
+
+/*************************************************************//**
+Read a double column value from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_double(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	col_no,		/*!< in: column number */
+	double*		dval);		/*!< out: double value */
+
+/*****************************************************************//**
+Write a float value to a column.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_float(
+/*=================*/
+	ib_tpl_t	ib_tpl,		/*!< in/out: tuple to write to */
+	int		col_no,		/*!< in: column number */
+	float		val);		/*!< in: value to write */
+
+/*************************************************************//**
+Read a float value from an InnoDB tuple.
+@return	DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_float(
+/*================*/
+	ib_tpl_t	ib_tpl,		/*!< in: InnoDB tuple */
+	ib_ulint_t	col_no,		/*!< in: column number */
+	float*		fval);		/*!< out: float value */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+
+const char*
+ib_col_get_name(
+/*============*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_ulint_t	i);		/*!< in: column index in tuple */
+
+/*****************************************************************//**
+Get an index field name from the cursor.
+@return name of the field */
+
+const char*
+ib_get_idx_field_name(
+/*==================*/
+	ib_crsr_t	ib_crsr,	/*!< in: InnoDB cursor instance */
+	ib_ulint_t	i);		/*!< in: column index in tuple */
+
+/*****************************************************************//**
+Truncate a table.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_truncate(
+/*==============*/
+	const char*	table_name,	/*!< in: table name */
+	ib_id_u64_t*	table_id);	/*!< out: new table id */
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return DB_SUCCESS or error number */
+
+ib_err_t
+ib_close_thd(
+/*=========*/
+	void*		thd);		/*!< in: handle to the MySQL
+					thread of the user whose resources
+					should be free'd */
+
+/*****************************************************************//**
+Get generic configure status
+@return configure status*/
+
+int
+ib_cfg_get_cfg();
+/*============*/
+
+/*****************************************************************//**
+Increase/decrease the memcached sync count of table to sync memcached
+DML with SQL DDLs.
+@return DB_SUCCESS or error number */
+ib_err_t
+ib_cursor_set_memcached_sync(
+/*=========================*/
+	ib_crsr_t	ib_crsr,	/*!< in: cursor */
+	ib_bool_t	flag);		/*!< in: true for increasing */
+
+/*****************************************************************//**
+Check whether the table name conforms to our requirements. Currently
+we only do a simple check for the presence of a '/'.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_table_name_check(
+/*================*/
+	const char*	name);		/*!< in: table name to check */
+
+/*****************************************************************//**
+Return isolation configuration set by "innodb_api_trx_level"
+@return trx isolation level*/
+
+ib_trx_state_t
+ib_cfg_trx_level();
+/*==============*/
+
+/*****************************************************************//**
+Return configure value for background commit interval (in seconds)
+@return background commit interval (in seconds) */
+
+ib_ulint_t
+ib_cfg_bk_commit_interval();
+/*=======================*/
+
+/*****************************************************************//**
+Get a trx start time.
+@return trx start_time */
+
+ib_u64_t
+ib_trx_get_start_time(
+/*==================*/
+	ib_trx_t	ib_trx);	/*!< in: transaction */
+
+#endif /* api0api_h */
diff --git a/storage/innobase/include/api0misc.h b/storage/innobase/include/api0misc.h
new file mode 100644
index 00000000000..fcd748390d1
--- /dev/null
+++ b/storage/innobase/include/api0misc.h
@@ -0,0 +1,78 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0misc.h
+InnoDB Native API
+
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+2008 Created by Sunny Bains
+*******************************************************/
+
+#ifndef api0misc_h
+#define	api0misc_h
+
+#include "univ.i"
+#include "os0file.h"
+#include "que0que.h"
+#include "trx0trx.h"
+
+/** Whether binlog is enabled for applications using InnoDB APIs */
+extern my_bool                  ib_binlog_enabled;
+
+/** Whether MySQL MDL is enabled for applications using InnoDB APIs */
+extern my_bool                  ib_mdl_enabled;
+
+/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */
+extern my_bool                  ib_disable_row_lock;
+
+/** configure value for transaction isolation level */
+extern ulong			ib_trx_level_setting;
+
+/** configure value for background commit interval (in seconds) */
+extern ulong			ib_bk_commit_interval;
+
+/********************************************************************
+Handles user errors and lock waits detected by the database engine.
+@return	TRUE if it was a lock wait and we should continue running
+the query thread */
+UNIV_INTERN
+ibool
+ib_handle_errors(
+/*=============*/
+	dberr_t*	new_err,	/*!< out: possible new error
+					encountered in lock wait, or if
+					no new error, the value of
+					trx->error_state at the entry of this
+					function */
+	trx_t*		trx,		/*!< in: transaction */
+	que_thr_t*	thr,		/*!< in: query thread */
+	trx_savept_t*	savept);	/*!< in: savepoint or NULL */
+
+/*************************************************************************
+Sets a lock on a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+ib_trx_lock_table_with_retry(
+/*=========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode);		/*!< in: lock mode */
+
+#endif /* api0misc_h */
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
new file mode 100644
index 00000000000..305acf7e322
--- /dev/null
+++ b/storage/innobase/include/btr0btr.h
@@ -0,0 +1,773 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.h
+The B-tree
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0btr_h
+#define btr0btr_h
+
+#include "univ.i"
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "mtr0mtr.h"
+#include "btr0types.h"
+
+#ifndef UNIV_HOTBACKUP
+/** Maximum record size which can be stored on a page, without using the
+special big record storage structure */
+#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
+
+/** @brief Maximum depth of a B-tree in InnoDB.
+
+Note that this isn't a maximum as such; none of the tree operations
+avoid producing trees bigger than this. It is instead a "max depth
+that other code must work with", useful for e.g.  fixed-size arrays
+that must store some information about each level in a tree. In other
+words: if a B-tree with bigger depth than this is encountered, it is
+not acceptable for it to lead to mysterious memory corruption, but it
+is acceptable for the program to die with a clear assert failure. */
+#define BTR_MAX_LEVELS		100
+
+/** Latching modes for btr_cur_search_to_nth_level(). */
+enum btr_latch_mode {
+	/** Search a record on a leaf page and S-latch it. */
+	BTR_SEARCH_LEAF = RW_S_LATCH,
+	/** (Prepare to) modify a record on a leaf page and X-latch it. */
+	BTR_MODIFY_LEAF	= RW_X_LATCH,
+	/** Obtain no latches. */
+	BTR_NO_LATCHES = RW_NO_LATCH,
+	/** Start modifying the entire B-tree. */
+	BTR_MODIFY_TREE = 33,
+	/** Continue modifying the entire B-tree. */
+	BTR_CONT_MODIFY_TREE = 34,
+	/** Search the previous record. */
+	BTR_SEARCH_PREV = 35,
+	/** Modify the previous record. */
+	BTR_MODIFY_PREV = 36
+};
+
+/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
+
+/** If this is ORed to btr_latch_mode, it means that the search tuple
+will be inserted to the index, at the searched position.
+When the record is not in the buffer pool, try to use the insert buffer. */
+#define BTR_INSERT		512
+
+/** This flag ORed to btr_latch_mode says that we do the search in query
+optimization */
+#define BTR_ESTIMATE		1024
+
+/** This flag ORed to BTR_INSERT says that we can ignore possible
+UNIQUE definition on secondary indexes when we decide if we can use
+the insert buffer to speed up inserts */
+#define BTR_IGNORE_SEC_UNIQUE	2048
+
+/** Try to delete mark the record at the searched position using the
+insert/delete buffer when the record is not in the buffer pool. */
+#define BTR_DELETE_MARK		4096
+
+/** Try to purge the record at the searched position using the insert/delete
+buffer when the record is not in the buffer pool. */
+#define BTR_DELETE		8192
+
+/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is
+already holding an S latch on the index tree */
+#define BTR_ALREADY_S_LATCHED	16384
+
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode)	\
+	((latch_mode) & ~(BTR_INSERT			\
+			  | BTR_DELETE_MARK		\
+			  | BTR_DELETE			\
+			  | BTR_ESTIMATE		\
+			  | BTR_IGNORE_SEC_UNIQUE	\
+			  | BTR_ALREADY_S_LATCHED))
+#endif /* UNIV_HOTBACKUP */
+
+/**************************************************************//**
+Report that an index page is corrupted. */
+UNIV_INTERN
+void
+btr_corruption_report(
+/*==================*/
+	const buf_block_t*	block,	/*!< in: corrupted block */
+	const dict_index_t*	index)	/*!< in: index tree */
+	UNIV_COLD __attribute__((nonnull));
+
+/** Assert that a B-tree page is not corrupted.
+@param block buffer block containing a B-tree page
+@param index the B-tree index */
+#define btr_assert_not_corrupted(block, index)			\
+	if ((ibool) !!page_is_comp(buf_block_get_frame(block))	\
+	    != dict_table_is_comp((index)->table)) {		\
+		btr_corruption_report(block, index);		\
+		ut_error;					\
+	}
+
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_BLOB_DEBUG
+# include "ut0rbt.h"
+/** An index->blobs entry for keeping track of off-page column references */
+struct btr_blob_dbg_t
+{
+	unsigned	blob_page_no:32;	/*!< first BLOB page number */
+	unsigned	ref_page_no:32;		/*!< referring page number */
+	unsigned	ref_heap_no:16;		/*!< referring heap number */
+	unsigned	ref_field_no:10;	/*!< referring field number */
+	unsigned	owner:1;		/*!< TRUE if BLOB owner */
+	unsigned	always_owner:1;		/*!< TRUE if always
+						has been the BLOB owner;
+						reset to TRUE on B-tree
+						page splits and merges */
+	unsigned	del:1;			/*!< TRUE if currently
+						delete-marked */
+};
+
+/**************************************************************//**
+Add a reference to an off-page column to the index->blobs map. */
+UNIV_INTERN
+void
+btr_blob_dbg_add_blob(
+/*==================*/
+	const rec_t*	rec,		/*!< in: clustered index record */
+	ulint		field_no,	/*!< in: number of off-page column */
+	ulint		page_no,	/*!< in: start page of the column */
+	dict_index_t*	index,		/*!< in/out: index tree */
+	const char*	ctx)		/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Display the references to off-page columns.
+This function is to be called from a debugger,
+for example when a breakpoint on ut_dbg_assertion_failed is hit. */
+UNIV_INTERN
+void
+btr_blob_dbg_print(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index tree */
+	__attribute__((nonnull));
+/**************************************************************//**
+Check that there are no references to off-page columns from or to
+the given page. Invoked when freeing or clearing a page.
+@return TRUE when no orphan references exist */
+UNIV_INTERN
+ibool
+btr_blob_dbg_is_empty(
+/*==================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		page_no)	/*!< in: page number */
+	__attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Modify the 'deleted' flag of a record. */
+UNIV_INTERN
+void
+btr_blob_dbg_set_deleted_flag(
+/*==========================*/
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in/out: index */
+	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
+	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
+	__attribute__((nonnull));
+/**************************************************************//**
+Change the ownership of an off-page column. */
+UNIV_INTERN
+void
+btr_blob_dbg_owner(
+/*===============*/
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in/out: index */
+	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
+	ulint			i,	/*!< in: ith field in rec */
+	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
+	__attribute__((nonnull));
+/** Assert that there are no BLOB references to or from the given page. */
+# define btr_blob_dbg_assert_empty(index, page_no)	\
+	ut_a(btr_blob_dbg_is_empty(index, page_no))
+#else /* UNIV_BLOB_DEBUG */
+# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx)	((void) 0)
+# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
+# define btr_blob_dbg_owner(rec, index, offsets, i, val)	((void) 0)
+# define btr_blob_dbg_assert_empty(index, page_no)		((void) 0)
+#endif /* UNIV_BLOB_DEBUG */
+
+/**************************************************************//**
+Gets the root node of a tree and x-latches it.
+@return	root page, x-latched */
+UNIV_INTERN
+page_t*
+btr_root_get(
+/*=========*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*			mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index tree */
+	__attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return	tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
+	dict_index_t*	index,	/*!< in: index tree */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+buf_block_t*
+btr_block_get_func(
+/*===============*/
+	ulint		space,		/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in bytes
+					or 0 for uncompressed pages */
+	ulint		page_no,	/*!< in: page number */
+	ulint		mode,		/*!< in: latch mode */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+# ifdef UNIV_SYNC_DEBUG
+	const dict_index_t*	index,	/*!< in: index tree, may be NULL
+					if it is not an insert buffer tree */
+# endif /* UNIV_SYNC_DEBUG */
+	mtr_t*		mtr);		/*!< in/out: mini-transaction */
+# ifdef UNIV_SYNC_DEBUG
+/** Gets a buffer page and declares its latching order level.
+@param space	tablespace identifier
+@param zip_size	compressed page size in bytes or 0 for uncompressed pages
+@param page_no	page number
+@param mode	latch mode
+@param index	index tree, may be NULL if not the insert buffer tree
+@param mtr	mini-transaction handle
+@return the block descriptor */
+#  define btr_block_get(space,zip_size,page_no,mode,index,mtr)	\
+	btr_block_get_func(space,zip_size,page_no,mode,		\
+			   __FILE__,__LINE__,index,mtr)
+# else /* UNIV_SYNC_DEBUG */
+/** Gets a buffer page and declares its latching order level.
+@param space	tablespace identifier
+@param zip_size	compressed page size in bytes or 0 for uncompressed pages
+@param page_no	page number
+@param mode	latch mode
+@param idx	index tree, may be NULL if not the insert buffer tree
+@param mtr	mini-transaction handle
+@return the block descriptor */
+#  define btr_block_get(space,zip_size,page_no,mode,idx,mtr)		\
+	btr_block_get_func(space,zip_size,page_no,mode,__FILE__,__LINE__,mtr)
+# endif /* UNIV_SYNC_DEBUG */
+/** Gets a buffer page and declares its latching order level.
+@param space	tablespace identifier
+@param zip_size	compressed page size in bytes or 0 for uncompressed pages
+@param page_no	page number
+@param mode	latch mode
+@param idx	index tree, may be NULL if not the insert buffer tree
+@param mtr	mini-transaction handle
+@return the uncompressed page frame */
+# define btr_page_get(space,zip_size,page_no,mode,idx,mtr)		\
+	buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,idx,mtr))
+#endif /* !UNIV_HOTBACKUP */
+/**************************************************************//**
+Gets the index id field of a page.
+@return	index id */
+UNIV_INLINE
+index_id_t
+btr_page_get_index_id(
+/*==================*/
+	const page_t*	page)	/*!< in: index page */
+	__attribute__((nonnull, pure, warn_unused_result));
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Gets the node level field in an index page.
+@return	level, leaf level == 0 */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+	const page_t*	page)	/*!< in: index page */
+	__attribute__((nonnull, pure, warn_unused_result));
+#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
+/********************************************************//**
+Gets the next index page number.
+@return	next page number */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************//**
+Gets the previous index page number.
+@return	prev page number */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Gets pointer to the previous user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor.
+@return	previous user record, NULL if there is none */
+UNIV_INTERN
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+	rec_t*	rec,	/*!< in: record on leaf level */
+	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
+			needed, also to the previous page */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Gets pointer to the next user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor.
+@return	next user record, NULL if there is none */
+UNIV_INTERN
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+	rec_t*	rec,	/*!< in: record on leaf level */
+	mtr_t*	mtr)	/*!< in: mtr holding a latch on the page, and if
+			needed, also to the next page */
+	__attribute__((nonnull, warn_unused_result));
+/**************************************************************//**
+Releases the latch on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
+@return	child node address */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+	const rec_t*	rec,	/*!< in: node pointer record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/************************************************************//**
+Creates the root node for a new index tree.
+@return	page number of the created root, FIL_NULL if did not succeed */
+UNIV_INTERN
+ulint
+btr_create(
+/*=======*/
+	ulint		type,	/*!< in: type of the index */
+	ulint		space,	/*!< in: space where created */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	index_id_t	index_id,/*!< in: index id */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+	__attribute__((nonnull));
+/************************************************************//**
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+UNIV_INTERN
+void
+btr_free_but_not_root(
+/*==================*/
+	ulint	space,		/*!< in: space where created */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	root_page_no);	/*!< in: root page number */
+/************************************************************//**
+Frees the B-tree root page. Other tree MUST already have been freed. */
+UNIV_INTERN
+void
+btr_free_root(
+/*==========*/
+	ulint	space,		/*!< in: space where created */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	root_page_no,	/*!< in: root page number */
+	mtr_t*	mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/*************************************************************//**
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called.
+@return	inserted record */
+UNIV_INTERN
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert: must be
+				on the root page; when the function returns,
+				the cursor is positioned on the predecessor
+				of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
+UNIV_INTERN
+bool
+btr_page_reorganize_low(
+/*====================*/
+	bool		recovery,/*!< in: true if called in recovery:
+				locks should not be updated, i.e.,
+				there cannot exist locks on the
+				page, and a hash index should not be
+				dropped: it cannot exist */
+	ulint		z_level,/*!< in: compression level to be used
+				if dealing with compressed page */
+	page_cur_t*	cursor,	/*!< in/out: page cursor */
+	dict_index_t*	index,	/*!< in: the index tree of the page */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Reorganizes an index page.
+
+IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index. This has to
+be done either within the same mini-transaction, or by invoking
+ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
+IBUF_BITMAP_FREE is unaffected by reorganization.
+
+@retval true if the operation was successful
+@retval false if it is a compressed page, and recompression failed */
+UNIV_INTERN
+bool
+btr_page_reorganize(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: page cursor */
+	dict_index_t*	index,	/*!< in: the index tree of the page */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/*************************************************************//**
+Decides if the page should be split at the convergence point of
+inserts converging to left.
+@return	TRUE if split recommended */
+UNIV_INTERN
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
+	rec_t**		split_rec)/*!< out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Decides if the page should be split at the convergence point of
+inserts converging to right.
+@return	TRUE if split recommended */
+UNIV_INTERN
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert */
+	rec_t**		split_rec)/*!< out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
+released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore enough
+free disk space (2 pages) must be guaranteed to be available before
+this function is called.
+
+@return inserted record */
+UNIV_INTERN
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	ulint**		offsets,/*!< out: offsets on inserted record */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
+	const dtuple_t*	tuple,	/*!< in: tuple to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************//**
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+UNIV_INTERN
+void
+btr_insert_on_non_leaf_level_func(
+/*==============================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level, must be > 0 */
+	dtuple_t*	tuple,	/*!< in: the record to be inserted */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+# define btr_insert_on_non_leaf_level(f,i,l,t,m)			\
+	btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
+#endif /* !UNIV_HOTBACKUP */
+/****************************************************************//**
+Sets a record as the predefined minimum record. */
+UNIV_INTERN
+void
+btr_set_min_rec_mark(
+/*=================*/
+	rec_t*	rec,	/*!< in/out: record */
+	mtr_t*	mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Deletes on the upper level the node pointer to a page. */
+UNIV_INTERN
+void
+btr_node_ptr_delete(
+/*================*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: page whose node pointer is deleted */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Checks that the node pointer to a page is appropriate.
+@return	TRUE */
+UNIV_INTERN
+ibool
+btr_check_node_ptr(
+/*===============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: index page */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+/*************************************************************//**
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the
+brother reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to
+the brothers, if they exist.
+@return	TRUE on success */
+UNIV_INTERN
+ibool
+btr_compress(
+/*=========*/
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to merge
+				or lift; the page must not be empty:
+				when deleting records, use btr_discard_page()
+				if the page would become empty */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/*************************************************************//**
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+UNIV_INTERN
+void
+btr_discard_page(
+/*=============*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the page to discard: not on
+				the root page */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/****************************************************************//**
+Parses the redo log record for setting an index record as the predefined
+minimum record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	ulint	comp,	/*!< in: nonzero=compact page format */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr)	/*!< in: mtr or NULL */
+	__attribute__((nonnull(1,2), warn_unused_result));
+/***********************************************************//**
+Parses a redo log record of reorganizing a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	bool		compressed,/*!< in: true if compressed page */
+	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
+	mtr_t*		mtr)	/*!< in: mtr or NULL */
+	__attribute__((nonnull(1,2,3), warn_unused_result));
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+Gets the number of pages in a B-tree.
+@return	number of pages, or ULINT_UNDEFINED if the index is unavailable */
+UNIV_INTERN
+ulint
+btr_get_size(
+/*=========*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		flag,	/*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction where index
+				is s-latched */
+	__attribute__((nonnull, warn_unused_result));
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents!
+@retval NULL if no page could be allocated
+@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+(init_mtr == mtr, or the page was not previously freed in mtr)
+@retval block (not allocated or initialized) otherwise */
+UNIV_INTERN
+buf_block_t*
+btr_page_alloc(
+/*===========*/
+	dict_index_t*	index,		/*!< in: index tree */
+	ulint		hint_page_no,	/*!< in: hint of a good page */
+	byte		file_direction,	/*!< in: direction where a possible
+					page split is made */
+	ulint		level,		/*!< in: level where the page is placed
+					in the tree */
+	mtr_t*		mtr,		/*!< in/out: mini-transaction
+					for the allocation */
+	mtr_t*		init_mtr)	/*!< in/out: mini-transaction
+					for x-latching and initializing
+					the page */
+	__attribute__((nonnull, warn_unused_result));
+/**************************************************************//**
+Frees a file page used in an index tree. NOTE: cannot free field external
+storage pages because the page must contain info on its level. */
+UNIV_INTERN
+void
+btr_page_free(
+/*==========*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+/**************************************************************//**
+Frees a file page used in an index tree. Can be used also to BLOB
+external storage pages, because the page level 0 can be given as an
+argument. */
+UNIV_INTERN
+void
+btr_page_free_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index tree */
+	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
+	ulint		level,	/*!< in: page level */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+#ifdef UNIV_BTR_PRINT
+/*************************************************************//**
+Prints size info of a B-tree. */
+UNIV_INTERN
+void
+btr_print_size(
+/*===========*/
+	dict_index_t*	index)	/*!< in: index tree */
+	__attribute__((nonnull));
+/**************************************************************//**
+Prints directories and other info of all nodes in the index. */
+UNIV_INTERN
+void
+btr_print_index(
+/*============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		width)	/*!< in: print this many entries from start
+				and end */
+	__attribute__((nonnull));
+#endif /* UNIV_BTR_PRINT */
+/************************************************************//**
+Checks the size and number of fields in a record based on the definition of
+the index.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+btr_index_rec_validate(
+/*===================*/
+	const rec_t*		rec,		/*!< in: index record */
+	const dict_index_t*	index,		/*!< in: index */
+	ibool			dump_on_error)	/*!< in: TRUE if the function
+						should print hex dump of record
+						and page on error */
+	__attribute__((nonnull, warn_unused_result));
+/**************************************************************//**
+Checks the consistency of an index tree.
+@return	TRUE if ok */
+UNIV_INTERN
+bool
+btr_validate_index(
+/*===============*/
+	dict_index_t*	index,			/*!< in: index */
+	const trx_t*	trx)			/*!< in: transaction or 0 */
+	__attribute__((nonnull(1), warn_unused_result));
+
+#define BTR_N_LEAF_PAGES	1
+#define BTR_TOTAL_SIZE		2
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
new file mode 100644
index 00000000000..00f50b5dcaf
--- /dev/null
+++ b/storage/innobase/include/btr0btr.ic
@@ -0,0 +1,290 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0btr.ic
+The B-tree
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "page0zip.h"
+
+#define BTR_MAX_NODE_LEVEL	50	/*!< Maximum B-tree page level
+					(not really a hard limit).
+					Used in debug assertions
+					in btr_page_set_level and
+					btr_page_get_level_low */
+
+/**************************************************************//**
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+buf_block_t*
+btr_block_get_func(
+/*===============*/
+	ulint		space,		/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in bytes
+					or 0 for uncompressed pages */
+	ulint		page_no,	/*!< in: page number */
+	ulint		mode,		/*!< in: latch mode */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+#ifdef UNIV_SYNC_DEBUG
+	const dict_index_t*	index,	/*!< in: index tree, may be NULL
+					if it is not an insert buffer tree */
+#endif /* UNIV_SYNC_DEBUG */
+	mtr_t*		mtr)		/*!< in/out: mtr */
+{
+	buf_block_t*	block;
+
+	block = buf_page_get_gen(space, zip_size, page_no, mode,
+				 NULL, BUF_GET, file, line, mtr);
+
+	if (mode != RW_NO_LATCH) {
+
+		buf_block_dbg_add_level(
+			block, index != NULL && dict_index_is_ibuf(index)
+			? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
+	}
+
+	return(block);
+}
+
+/**************************************************************//**
+Sets the index id field of a page. */
+UNIV_INLINE
+void
+btr_page_set_index_id(
+/*==================*/
+	page_t*		page,	/*!< in: page to be created */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	index_id_t	id,	/*!< in: index id */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	if (page_zip) {
+		mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_INDEX_ID),
+				      8, mtr);
+	} else {
+		mlog_write_ull(page + (PAGE_HEADER + PAGE_INDEX_ID), id, mtr);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/**************************************************************//**
+Gets the index id field of a page.
+@return	index id */
+UNIV_INLINE
+index_id_t
+btr_page_get_index_id(
+/*==================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Gets the node level field in an index page.
+@return	level, leaf level == 0 */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	ulint	level;
+
+	ut_ad(page);
+
+	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	return(level);
+}
+
+/********************************************************//**
+Sets the node level field in an index page. */
+UNIV_INLINE
+void
+btr_page_set_level(
+/*===============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		level,	/*!< in: level, leaf level == 0 */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	if (page_zip) {
+		mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_LEVEL),
+				      2, mtr);
+	} else {
+		mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
+				 MLOG_2BYTES, mtr);
+	}
+}
+
+/********************************************************//**
+Gets the next index page number.
+@return	next page number */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*		mtr __attribute__((unused)))
+				/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
+
+	return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/********************************************************//**
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		next,	/*!< in: next page number */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	if (page_zip) {
+		mach_write_to_4(page + FIL_PAGE_NEXT, next);
+		page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
+	} else {
+		mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+	}
+}
+
+/********************************************************//**
+Gets the previous index page number.
+@return	prev page number */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+	const page_t*	page,	/*!< in: index page */
+	mtr_t*	mtr __attribute__((unused))) /*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+/********************************************************//**
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		prev,	/*!< in: previous page number */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	if (page_zip) {
+		mach_write_to_4(page + FIL_PAGE_PREV, prev);
+		page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
+	} else {
+		mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+	}
+}
+
+/**************************************************************//**
+Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
+@return	child node address */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+	const rec_t*	rec,	/*!< in: node pointer record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	const byte*	field;
+	ulint		len;
+	ulint		page_no;
+
+	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
+
+	/* The child address is in the last field */
+	field = rec_get_nth_field(rec, offsets,
+				  rec_offs_n_fields(offsets) - 1, &len);
+
+	ut_ad(len == 4);
+
+	page_no = mach_read_from_4(field);
+
+	if (page_no == 0) {
+		fprintf(stderr,
+			"InnoDB: a nonsensical page number 0"
+			" in a node ptr record at offset %lu\n",
+			(ulong) page_offset(rec));
+		buf_page_print(page_align(rec), 0, 0);
+		ut_ad(0);
+	}
+
+	return(page_no);
+}
+
+/**************************************************************//**
+Releases the latches on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
+	ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
+
+	mtr_memo_release(mtr, block,
+			 latch_mode == BTR_SEARCH_LEAF
+			 ? MTR_MEMO_PAGE_S_FIX
+			 : MTR_MEMO_PAGE_X_FIX);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
new file mode 100644
index 00000000000..f1e4406fcf7
--- /dev/null
+++ b/storage/innobase/include/btr0cur.h
@@ -0,0 +1,937 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0cur.h
+The index tree cursor
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0cur_h
+#define btr0cur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "page0cur.h"
+#include "btr0types.h"
+
+/** Mode flags for btr_cur operations; these can be ORed */
+enum {
+	/** do no undo logging */
+	BTR_NO_UNDO_LOG_FLAG = 1,
+	/** do no record lock checking */
+	BTR_NO_LOCKING_FLAG = 2,
+	/** sys fields will be found in the update vector or inserted
+	entry */
+	BTR_KEEP_SYS_FLAG = 4,
+	/** btr_cur_pessimistic_update() must keep cursor position
+	when moving columns to big_rec */
+	BTR_KEEP_POS_FLAG = 8,
+	/** the caller is creating the index or wants to bypass the
+	index->info.online creation log */
+	BTR_CREATE_FLAG = 16,
+	/** the caller of btr_cur_optimistic_update() or
+	btr_cur_update_in_place() will take care of
+	updating IBUF_BITMAP_FREE */
+	BTR_KEEP_IBUF_BITMAP = 32
+};
+
+#ifndef UNIV_HOTBACKUP
+#include "que0types.h"
+#include "row0types.h"
+#include "ha0ha.h"
+
+#define BTR_CUR_ADAPT
+#define BTR_CUR_HASH_ADAPT
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the page cursor component of a tree cursor.
+@return	pointer to page cursor component */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+	const btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the buffer block on which the tree cursor is positioned.
+@return	pointer to buffer block */
+UNIV_INLINE
+buf_block_t*
+btr_cur_get_block(
+/*==============*/
+	const btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the record pointer of a tree cursor.
+@return	pointer to record */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+	const btr_cur_t*	cursor);/*!< in: tree cursor */
+#else /* UNIV_DEBUG */
+# define btr_cur_get_page_cur(cursor)	(&(cursor)->page_cur)
+# define btr_cur_get_block(cursor)	((cursor)->page_cur.block)
+# define btr_cur_get_rec(cursor)	((cursor)->page_cur.rec)
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Returns the compressed page on which the tree cursor is positioned.
+@return	pointer to compressed page, or NULL if the page is not compressed */
+UNIV_INLINE
+page_zip_des_t*
+btr_cur_get_page_zip(
+/*=================*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the page of a tree cursor.
+@return	pointer to page */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+	btr_cur_t*	cursor);/*!< in: tree cursor */
+/*********************************************************//**
+Returns the index of a cursor.
+@param cursor	b-tree cursor
+@return	index */
+#define btr_cur_get_index(cursor) ((cursor)->index)
+/*********************************************************//**
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index,	/*!< in: index */
+	rec_t*		rec,	/*!< in: record in tree */
+	buf_block_t*	block,	/*!< in: buffer block of rec */
+	btr_cur_t*	cursor);/*!< in: cursor */
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
+UNIV_INTERN
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the tree level of search */
+	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be PAGE_CUR_LE,
+				not PAGE_CUR_GE, as the latter may end up on
+				the previous page of the record! Inserts
+				should always be made using PAGE_CUR_LE to
+				search the position! */
+	ulint		latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
+				at most one of BTR_INSERT, BTR_DELETE_MARK,
+				BTR_DELETE, or BTR_ESTIMATE;
+				cursor->left_block is used to store a pointer
+				to the left neighbor page, in the cases
+				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
+				NOTE that if has_search_latch
+				is != 0, we maybe do not have a latch set
+				on the cursor page, we assume
+				the caller uses his search latch
+				to protect the record! */
+	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
+				s- or x-latched, but see also above! */
+	ulint		has_search_latch,/*!< in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*****************************************************************//**
+Opens a cursor at either end of an index. */
+UNIV_INTERN
+void
+btr_cur_open_at_index_side_func(
+/*============================*/
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_cur_t*	cursor,		/*!< in/out: cursor */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf) */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+#define btr_cur_open_at_index_side(f,i,l,c,lv,m)			\
+	btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
+/**********************************************************************//**
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INTERN
+void
+btr_cur_open_at_rnd_pos_func(
+/*=========================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< in/out: B-tree cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_cur_open_at_rnd_pos(i,l,c,m)				\
+	btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
+/*************************************************************//**
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record.
+@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
+UNIV_INTERN
+dberr_t
+btr_cur_optimistic_insert(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if not
+				zero, the parameters index and thr should be
+				specified */
+	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
+				cursor stays valid */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	dtuple_t*	entry,	/*!< in/out: entry to insert */
+	rec_t**		rec,	/*!< out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction;
+				if this function returns DB_SUCCESS on
+				a leaf page of a secondary index in a
+				compressed tablespace, the caller must
+				mtr_commit(mtr) before latching
+				any further pages */
+	__attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
+/*************************************************************//**
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+btr_cur_pessimistic_insert(
+/*=======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags: if not
+				zero, the parameter thr should be
+				specified; if no undo logging is specified,
+				then the caller must have reserved enough
+				free extents in the file space so that the
+				insertion will certainly succeed */
+	btr_cur_t*	cursor,	/*!< in: cursor after which to insert;
+				cursor stays valid */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
+	dtuple_t*	entry,	/*!< in/out: entry to insert */
+	rec_t**		rec,	/*!< out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr,	/*!< in: query thread or NULL */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
+/*************************************************************//**
+See if there is enough place in the page modification log to log
+an update-in-place.
+
+@retval false if out of space; IBUF_BITMAP_FREE will be reset
+outside mtr if the page was recompressed
+@retval	true if enough place;
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
+a secondary index leaf page. This has to be done either within the
+same mini-transaction, or by invoking ibuf_reset_free_bits() before
+mtr_commit(mtr). */
+UNIV_INTERN
+bool
+btr_cur_update_alloc_zip_func(
+/*==========================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	page_cur_t*	cursor,	/*!< in/out: B-tree page cursor */
+	dict_index_t*	index,	/*!< in: the index corresponding to cursor */
+#ifdef UNIV_DEBUG
+	ulint*		offsets,/*!< in/out: offsets of the cursor record */
+#endif /* UNIV_DEBUG */
+	ulint		length,	/*!< in: size needed */
+	bool		create,	/*!< in: true=delete-and-insert,
+				false=update-in-place */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
+	btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr)
+#else /* UNIV_DEBUG */
+# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
+	btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr)
+#endif /* UNIV_DEBUG */
+/*************************************************************//**
+Updates a record when the update causes no size changes in its fields.
+@return locking or undo log related error code, or
+@retval DB_SUCCESS on success
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
+UNIV_INTERN
+dberr_t
+btr_cur_update_in_place(
+/*====================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	ulint*		offsets,/*!< in/out: offsets on cursor->page_cur.rec */
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction; if this
+				is a secondary index, the caller must
+				mtr_commit(mtr) before latching any
+				further pages */
+	__attribute__((warn_unused_result, nonnull));
+/***********************************************************//**
+Writes a redo log record of updating a record in-place. */
+UNIV_INTERN
+void
+btr_cur_update_in_place_log(
+/*========================*/
+	ulint		flags,		/*!< in: flags */
+	const rec_t*	rec,		/*!< in: record */
+	dict_index_t*	index,		/*!< in: index of the record */
+	const upd_t*	update,		/*!< in: update vector */
+	trx_id_t	trx_id,		/*!< in: transaction id */
+	roll_ptr_t	roll_ptr,	/*!< in: roll ptr */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended.
+@return error code, including
+@retval DB_SUCCESS on success
+@retval DB_OVERFLOW if the updated record does not fit
+@retval DB_UNDERFLOW if the page would become too empty
+@retval DB_ZIP_OVERFLOW if there is not enough space left
+on the compressed page */
+UNIV_INTERN
+dberr_t
+btr_cur_optimistic_update(
+/*======================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to NULL or memory heap */
+	const upd_t*	update,	/*!< in: update vector; this must also
+				contain trx id and roll ptr fields */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction; if this
+				is a secondary index, the caller must
+				mtr_commit(mtr) before latching any
+				further pages */
+	__attribute__((warn_unused_result, nonnull));
+/*************************************************************//**
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+btr_cur_pessimistic_update(
+/*=======================*/
+	ulint		flags,	/*!< in: undo logging, locking, and rollback
+				flags */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
+				cursor may become invalid if *big_rec == NULL
+				|| !(flags & BTR_KEEP_POS_FLAG) */
+	ulint**		offsets,/*!< out: offsets on cursor->page_cur.rec */
+	mem_heap_t**	offsets_heap,
+				/*!< in/out: pointer to memory heap
+				that can be emptied, or NULL */
+	mem_heap_t*	entry_heap,
+				/*!< in/out: memory heap for allocating
+				big_rec and the index tuple */
+	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
+				be stored externally by the caller, or NULL */
+	const upd_t*	update,	/*!< in: update vector; this is allowed also
+				contain trx id and roll ptr fields, but
+				the values in update vector have no effect */
+	ulint		cmpl_info,/*!< in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction; must be committed
+				before latching any further pages */
+	__attribute__((warn_unused_result, nonnull));
+/***********************************************************//**
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
+dberr_t
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+	buf_block_t*	block,	/*!< in/out: buffer block of the record */
+	rec_t*		rec,	/*!< in/out: record */
+	dict_index_t*	index,	/*!< in: clustered index of the record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************//**
+Sets a secondary index record delete mark to TRUE or FALSE.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
+UNIV_INTERN
+dberr_t
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+	ulint		flags,	/*!< in: locking flag */
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	ibool		val,	/*!< in: value to set */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done!
+@return	TRUE if compression occurred */
+UNIV_INTERN
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to compress;
+				cursor does not stay valid if compression
+				occurs */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/*******************************************************//**
+Removes the record on which the tree cursor is positioned. It is assumed
+that the mtr has an x-latch on the page where the cursor is positioned,
+but no latch on the whole tree.
+@return	TRUE if success, i.e., the page did not become too empty */
+UNIV_INTERN
+ibool
+btr_cur_optimistic_delete_func(
+/*===========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
+				cursor stays valid: if deletion succeeds,
+				on function exit it points to the successor
+				of the deleted record */
+# ifdef UNIV_DEBUG
+	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
+# endif /* UNIV_DEBUG */
+	mtr_t*		mtr)	/*!< in: mtr; if this function returns
+				TRUE on a leaf page of a secondary
+				index, the mtr must be committed
+				before latching any further pages */
+	__attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+#  define btr_cur_optimistic_delete(cursor, flags, mtr)		\
+	btr_cur_optimistic_delete_func(cursor, flags, mtr)
+# else /* UNIV_DEBUG */
+#  define btr_cur_optimistic_delete(cursor, flags, mtr)		\
+	btr_cur_optimistic_delete_func(cursor, mtr)
+# endif /* UNIV_DEBUG */
+/*************************************************************//**
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist.
+@return	TRUE if compression occurred */
+UNIV_INTERN
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+	dberr_t*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+				the latter may occur because we may have
+				to update node pointers on upper levels,
+				and in the case of variable length keys
+				these may actually grow in size */
+	ibool		has_reserved_extents, /*!< in: TRUE if the
+				caller has already reserved enough free
+				extents so that he knows that the operation
+				will succeed */
+	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
+				if compression does not occur, the cursor
+				stays valid: it points to successor of
+				deleted record on function exit */
+	ulint		flags,	/*!< in: BTR_CREATE_FLAG or 0 */
+	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a redo log record of updating a record in-place.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index);	/*!< in: index corresponding to page */
+/****************************************************************//**
+Parses the redo log record for delete marking or unmarking of a clustered
+index record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index);	/*!< in: index corresponding to page */
+/****************************************************************//**
+Parses the redo log record for delete marking or unmarking of a secondary
+index record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< in/out: page or NULL */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page, or NULL */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Estimates the number of rows in a given index range.
+@return	estimated number of rows */
+UNIV_INTERN
+ib_int64_t
+btr_estimate_n_rows_in_range(
+/*=========================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
+	ulint		mode1,	/*!< in: search mode for range start */
+	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
+	ulint		mode2);	/*!< in: search mode for range end */
+/*******************************************************************//**
+Estimates the number of different key values in a given index, for
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
+If innodb_stats_method is nulls_ignored, we also record the number of
+non-null values for each prefix and stored the estimates in
+array index->stat_n_non_null_key_vals. */
+UNIV_INTERN
+void
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+	dict_index_t*	index);	/*!< in: index */
+
+/** Gets the externally stored size of a record, in units of a database page.
+@param[in]	rec	record
+@param[in]	offsets	array returned by rec_get_offsets()
+@return externally stored part, in units of a database page */
+
+ulint
+btr_rec_get_externally_stored_len(
+	const rec_t*	rec,
+	const ulint*	offsets);
+
+/*******************************************************************//**
+Marks non-updated off-page fields as disowned by this record. The ownership
+must be transferred to the updated record which is inserted elsewhere in the
+index tree. In purge only the owner of externally stored field is allowed
+to free the field. */
+UNIV_INTERN
+void
+btr_cur_disown_inherited_fields(
+/*============================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
+				part will be updated, or NULL */
+	rec_t*		rec,	/*!< in/out: record in a clustered index */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const upd_t*	update,	/*!< in: update vector */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull(2,3,4,5,6)));
+
+/** Operation code for btr_store_big_rec_extern_fields(). */
+enum blob_op {
+	/** Store off-page columns for a freshly inserted record */
+	BTR_STORE_INSERT = 0,
+	/** Store off-page columns for an insert by update */
+	BTR_STORE_INSERT_UPDATE,
+	/** Store off-page columns for an update */
+	BTR_STORE_UPDATE
+};
+
+/*******************************************************************//**
+Determine if an operation on off-page columns is an update.
+@return TRUE if op != BTR_STORE_INSERT */
+UNIV_INLINE
+ibool
+btr_blob_op_is_update(
+/*==================*/
+	enum blob_op	op)	/*!< in: operation */
+	__attribute__((warn_unused_result));
+
+/*******************************************************************//**
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec.  The extern flags in rec will have to be set beforehand.
+The fields are stored on pages allocated from leaf node
+file segment of the index tree.
+@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+btr_store_big_rec_extern_fields(
+/*============================*/
+	dict_index_t*	index,		/*!< in: index of rec; the index tree
+					MUST be X-latched */
+	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
+	rec_t*		rec,		/*!< in/out: record */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
+					the "external storage" flags in offsets
+					will not correspond to rec when
+					this function returns */
+	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
+					to be stored externally */
+	mtr_t*		btr_mtr,	/*!< in: mtr containing the
+					latches to the clustered index */
+	enum blob_op	op)		/*! in: operation code */
+	__attribute__((nonnull, warn_unused_result));
+
+/*******************************************************************//**
+Frees the space in an externally stored field to the file space
+management if the field in data is owned the externally stored field,
+in a rollback we may have the additional condition that the field must
+not be inherited. */
+UNIV_INTERN
+void
+btr_free_externally_stored_field(
+/*=============================*/
+	dict_index_t*	index,		/*!< in: index of the data, the index
+					tree MUST be X-latched; if the tree
+					height is 1, then also the root page
+					must be X-latched! (this is relevant
+					in the case this function is called
+					from purge where 'data' is located on
+					an undo log page, not an index
+					page) */
+	byte*		field_ref,	/*!< in/out: field reference */
+	const rec_t*	rec,		/*!< in: record containing field_ref, for
+					page_zip_write_blob_ptr(), or NULL */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index),
+					or NULL */
+	page_zip_des_t*	page_zip,	/*!< in: compressed page corresponding
+					to rec, or NULL if rec == NULL */
+	ulint		i,		/*!< in: field number of field_ref;
+					ignored if rec == NULL */
+	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
+	mtr_t*		local_mtr);	/*!< in: mtr containing the latch to
+					data an an X-latch to the index
+					tree */
+/*******************************************************************//**
+Copies the prefix of an externally stored field of a record.  The
+clustered index record must be protected by a lock or a page latch.
+@return the length of the copied field, or 0 if the column was being
+or has been deleted */
+UNIV_INTERN
+ulint
+btr_copy_externally_stored_field_prefix(
+/*====================================*/
+	byte*		buf,	/*!< out: the field, or a prefix of it */
+	ulint		len,	/*!< in: length of buf, in bytes */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	const byte*	data,	/*!< in: 'internally' stored part of the
+				field containing also the reference to
+				the external part; must be protected by
+				a lock or a page latch */
+	ulint		local_len);/*!< in: length of data, in bytes */
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.  The
+clustered index record must be protected by a lock or a page latch.
+@return the whole field copied to heap */
+UNIV_INTERN
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+	ulint*		len,	/*!< out: length of the whole field */
+	const byte*	data,	/*!< in: 'internally' stored part of the
+				field containing also the reference to
+				the external part; must be protected by
+				a lock or a page latch */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	ulint		local_len,/*!< in: length of data */
+	mem_heap_t*	heap);	/*!< in: mem heap */
+/*******************************************************************//**
+Copies an externally stored field of a record to mem heap.
+@return	the field copied to heap, or NULL if the field is incomplete */
+UNIV_INTERN
+byte*
+btr_rec_copy_externally_stored_field(
+/*=================================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index;
+				must be protected by a lock or a page latch */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
+				zero for uncompressed BLOBs */
+	ulint		no,	/*!< in: field number */
+	ulint*		len,	/*!< out: length of the field */
+	mem_heap_t*	heap);	/*!< in: mem heap */
+/*******************************************************************//**
+Flags the data tuple fields that are marked as extern storage in the
+update vector.  We use this function to remember which fields we must
+mark as extern storage in a record inserted for an update.
+@return	number of flagged external columns */
+UNIV_INTERN
+ulint
+btr_push_update_extern_fields(
+/*==========================*/
+	dtuple_t*	tuple,	/*!< in/out: data tuple */
+	const upd_t*	update,	/*!< in: update vector */
+	mem_heap_t*	heap)	/*!< in: memory heap */
+	__attribute__((nonnull));
+/***********************************************************//**
+Sets a secondary index record's delete mark to the given value. This
+function is only used by the insert buffer merge mechanism. */
+UNIV_INTERN
+void
+btr_cur_set_deleted_flag_for_ibuf(
+/*==============================*/
+	rec_t*		rec,		/*!< in/out: record */
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page
+					corresponding to rec, or NULL
+					when the tablespace is
+					uncompressed */
+	ibool		val,		/*!< in: value to set */
+	mtr_t*		mtr);		/*!< in/out: mini-transaction */
+/*######################################################################*/
+
+/** In the pessimistic delete, if the page data size drops below this
+limit, merging it to a neighbor is tried */
+#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
+
+/** A slot in the path array. We store here info on a search path down the
+tree. Each slot contains data on a single level of the tree. */
+
+struct btr_path_t{
+	ulint	nth_rec;	/*!< index of the record
+				where the page cursor stopped on
+				this level (index in alphabetical
+				order); value ULINT_UNDEFINED
+				denotes array end */
+	ulint	n_recs;		/*!< number of records on the page */
+	ulint	page_no;	/*!< no of the page containing the record */
+	ulint	page_level;	/*!< level of the page, if later we fetch
+				the page under page_no and it is no different
+				level then we know that the tree has been
+				reorganized */
+};
+
+#define BTR_PATH_ARRAY_N_SLOTS	250	/*!< size of path array (in slots) */
+
+/** Values for the flag documenting the used search method */
+enum btr_cur_method {
+	BTR_CUR_HASH = 1,	/*!< successful shortcut using
+				the hash index */
+	BTR_CUR_HASH_FAIL,	/*!< failure using hash, success using
+				binary search: the misleading hash
+				reference is stored in the field
+				hash_node, and might be necessary to
+				update */
+	BTR_CUR_BINARY,		/*!< success using the binary search */
+	BTR_CUR_INSERT_TO_IBUF,	/*!< performed the intended insert to
+				the insert buffer */
+	BTR_CUR_DEL_MARK_IBUF,	/*!< performed the intended delete
+				mark in the insert/delete buffer */
+	BTR_CUR_DELETE_IBUF,	/*!< performed the intended delete in
+				the insert/delete buffer */
+	BTR_CUR_DELETE_REF	/*!< row_purge_poss_sec() failed */
+};
+
+/** The tree cursor: the definition appears here only for the compiler
+to know struct size! */
+struct btr_cur_t {
+	dict_index_t*	index;		/*!< index where positioned */
+	page_cur_t	page_cur;	/*!< page cursor */
+	purge_node_t*	purge_node;	/*!< purge node, for BTR_DELETE */
+	buf_block_t*	left_block;	/*!< this field is used to store
+					a pointer to the left neighbor
+					page, in the cases
+					BTR_SEARCH_PREV and
+					BTR_MODIFY_PREV */
+	/*------------------------------*/
+	que_thr_t*	thr;		/*!< this field is only used
+					when btr_cur_search_to_nth_level
+					is called for an index entry
+					insertion: the calling query
+					thread is passed here to be
+					used in the insert buffer */
+	/*------------------------------*/
+	/** The following fields are used in
+	btr_cur_search_to_nth_level to pass information: */
+	/* @{ */
+	enum btr_cur_method	flag;	/*!< Search method used */
+	ulint		tree_height;	/*!< Tree height if the search is done
+					for a pessimistic insert or update
+					operation */
+	ulint		up_match;	/*!< If the search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					the first user record to the right of
+					the cursor record after
+					btr_cur_search_to_nth_level;
+					for the mode PAGE_CUR_GE, the matched
+					fields to the first user record AT THE
+					CURSOR or to the right of it;
+					NOTE that the up_match and low_match
+					values may exceed the correct values
+					for comparison to the adjacent user
+					record if that record is on a
+					different leaf page! (See the note in
+					row_ins_duplicate_error_in_clust.) */
+	ulint		up_bytes;	/*!< number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		low_match;	/*!< if search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					first user record AT THE CURSOR or
+					to the left of it after
+					btr_cur_search_to_nth_level;
+					NOT defined for PAGE_CUR_GE or any
+					other search modes; see also the NOTE
+					in up_match! */
+	ulint		low_bytes;	/*!< number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		n_fields;	/*!< prefix length used in a hash
+					search if hash_node != NULL */
+	ulint		n_bytes;	/*!< hash prefix bytes if hash_node !=
+					NULL */
+	ulint		fold;		/*!< fold value used in the search if
+					flag is BTR_CUR_HASH */
+	/* @} */
+	btr_path_t*	path_arr;	/*!< in estimating the number of
+					rows in range, we store in this array
+					information of the path through
+					the tree */
+};
+
+/** If pessimistic delete fails because of lack of file space, there
+is still a good change of success a little later.  Try this many
+times. */
+#define BTR_CUR_RETRY_DELETE_N_TIMES	100
+/** If pessimistic delete fails because of lack of file space, there
+is still a good change of success a little later.  Sleep this many
+microseconds between retries. */
+#define BTR_CUR_RETRY_SLEEP_TIME	50000
+
+/** The reference in a field for which data is stored on a different page.
+The reference is at the end of the 'locally' stored part of the field.
+'Locally' means storage in the index record.
+We store locally a long enough prefix of each column so that we can determine
+the ordering parts of each index record without looking into the externally
+stored part. */
+/*-------------------------------------- @{ */
+#define BTR_EXTERN_SPACE_ID		0	/*!< space id where stored */
+#define BTR_EXTERN_PAGE_NO		4	/*!< page no where stored */
+#define BTR_EXTERN_OFFSET		8	/*!< offset of BLOB header
+						on that page */
+#define BTR_EXTERN_LEN			12	/*!< 8 bytes containing the
+						length of the externally
+						stored part of the BLOB.
+						The 2 highest bits are
+						reserved to the flags below. */
+/*-------------------------------------- @} */
+/* #define BTR_EXTERN_FIELD_REF_SIZE	20 // moved to btr0types.h */
+
+/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
+significant bit of the byte at smallest address) is set to 1 if this
+field does not 'own' the externally stored field; only the owner field
+is allowed to free the field in purge! */
+#define BTR_EXTERN_OWNER_FLAG		128
+/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
+second most significant bit of the byte at smallest address) is 1 then
+it means that the externally stored field was inherited from an
+earlier version of the row.  In rollback we are not allowed to free an
+inherited external field. */
+#define BTR_EXTERN_INHERITED_FLAG	64
+
+/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+extern ulint	btr_cur_n_non_sea;
+/** Number of successful adaptive hash index lookups in
+btr_cur_search_to_nth_level(). */
+extern ulint	btr_cur_n_sea;
+/** Old value of btr_cur_n_non_sea.  Copied by
+srv_refresh_innodb_monitor_stats().  Referenced by
+srv_printf_innodb_monitor(). */
+extern ulint	btr_cur_n_non_sea_old;
+/** Old value of btr_cur_n_sea.  Copied by
+srv_refresh_innodb_monitor_stats().  Referenced by
+srv_printf_innodb_monitor(). */
+extern ulint	btr_cur_n_sea_old;
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+extern uint	btr_cur_limit_optimistic_insert_debug;
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
new file mode 100644
index 00000000000..43ee3304c0e
--- /dev/null
+++ b/storage/innobase/include/btr0cur.ic
@@ -0,0 +1,223 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0cur.ic
+The index tree cursor
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+#include "btr0btr.h"
+
+#ifdef UNIV_DEBUG
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
+if (btr_cur_limit_optimistic_insert_debug > 1\
+    && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+        CODE;\
+}
+#else
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the page cursor component of a tree cursor.
+@return	pointer to page cursor component */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+	const btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(&((btr_cur_t*) cursor)->page_cur);
+}
+
+/*********************************************************//**
+Returns the buffer block on which the tree cursor is positioned.
+@return	pointer to buffer block */
+UNIV_INLINE
+buf_block_t*
+btr_cur_get_block(
+/*==============*/
+	const btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the record pointer of a tree cursor.
+@return	pointer to record */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+	const btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(page_cur_get_rec(btr_cur_get_page_cur(cursor)));
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************//**
+Returns the compressed page on which the tree cursor is positioned.
+@return	pointer to compressed page, or NULL if the page is not compressed */
+UNIV_INLINE
+page_zip_des_t*
+btr_cur_get_page_zip(
+/*=================*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
+}
+
+/*********************************************************//**
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	page_cur_invalidate(&(cursor->page_cur));
+}
+
+/*********************************************************//**
+Returns the page of a tree cursor.
+@return	pointer to page */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+	btr_cur_t*	cursor)	/*!< in: tree cursor */
+{
+	return(page_align(page_cur_get_rec(&(cursor->page_cur))));
+}
+
+/*********************************************************//**
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index,	/*!< in: index */
+	rec_t*		rec,	/*!< in: record in tree */
+	buf_block_t*	block,	/*!< in: buffer block of rec */
+	btr_cur_t*	cursor)	/*!< out: cursor */
+{
+	ut_ad(page_align(rec) == block->frame);
+
+	page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
+
+	cursor->index = index;
+}
+
+/*********************************************************************//**
+Checks if compressing an index page where a btr cursor is placed makes
+sense.
+@return	TRUE if compression is recommended */
+UNIV_INLINE
+ibool
+btr_cur_compress_recommendation(
+/*============================*/
+	btr_cur_t*	cursor,	/*!< in: btr cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	const page_t*	page;
+
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+				      return(FALSE));
+
+	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+
+		/* The page fillfactor has dropped below a predefined
+		minimum value OR the level in the B-tree contains just
+		one page: we recommend compression if this is not the
+		root page. */
+
+		return(dict_index_get_page(cursor->index)
+		       != page_get_page_no(page));
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if the record on which the cursor is placed can be deleted without
+making tree compression necessary (or, recommended).
+@return	TRUE if can be deleted without recommended compression */
+UNIV_INLINE
+ibool
+btr_cur_can_delete_without_compress(
+/*================================*/
+	btr_cur_t*	cursor,	/*!< in: btr cursor */
+	ulint		rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	page_t*		page;
+
+	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
+	    || (page_get_n_recs(page) < 2)) {
+
+		/* The page fillfactor will drop below a predefined
+		minimum value, OR the level in the B-tree contains just
+		one page, OR the page will become empty: we recommend
+		compression if this is not the root page. */
+
+		return(dict_index_get_page(cursor->index)
+		       == page_get_page_no(page));
+	}
+
+	return(TRUE);
+}
+
+/*******************************************************************//**
+Determine if an operation on off-page columns is an update.
+@return TRUE if op != BTR_STORE_INSERT */
+UNIV_INLINE
+ibool
+btr_blob_op_is_update(
+/*==================*/
+	enum blob_op	op)	/*!< in: operation */
+{
+	switch (op) {
+	case BTR_STORE_INSERT:
+		return(FALSE);
+	case BTR_STORE_INSERT_UPDATE:
+	case BTR_STORE_UPDATE:
+		return(TRUE);
+	}
+
+	ut_ad(0);
+	return(FALSE);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
new file mode 100644
index 00000000000..cfbaacf4de3
--- /dev/null
+++ b/storage/innobase/include/btr0pcur.h
@@ -0,0 +1,548 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0pcur.h
+The index tree persistent cursor
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0pcur_h
+#define btr0pcur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+#include "page0cur.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "btr0types.h"
+
+/* Relative positions for a stored cursor position */
+#define BTR_PCUR_ON			1
+#define BTR_PCUR_BEFORE			2
+#define BTR_PCUR_AFTER			3
+/* Note that if the tree is not empty, btr_pcur_store_position does not
+use the following, but only uses the above three alternatives, where the
+position is stored relative to a specific record: this makes implementation
+of a scroll cursor easier */
+#define BTR_PCUR_BEFORE_FIRST_IN_TREE	4	/* in an empty tree */
+#define BTR_PCUR_AFTER_LAST_IN_TREE	5	/* in an empty tree */
+
+/**************************************************************//**
+Allocates memory for a persistent cursor object and initializes the cursor.
+@return	own: persistent cursor */
+UNIV_INTERN
+btr_pcur_t*
+btr_pcur_create_for_mysql(void);
+/*============================*/
+
+/**************************************************************//**
+Resets a persistent cursor object, freeing ::old_rec_buf if it is
+allocated and resetting the other members to their initial values. */
+UNIV_INTERN
+void
+btr_pcur_reset(
+/*===========*/
+	btr_pcur_t*	cursor);/*!< in, out: persistent cursor */
+
+/**************************************************************//**
+Frees the memory for a persistent cursor object. */
+UNIV_INTERN
+void
+btr_pcur_free_for_mysql(
+/*====================*/
+	btr_pcur_t*	cursor);	/*!< in, own: persistent cursor */
+/**************************************************************//**
+Copies the stored position of a pcur to another pcur. */
+UNIV_INTERN
+void
+btr_pcur_copy_stored_position(
+/*==========================*/
+	btr_pcur_t*	pcur_receive,	/*!< in: pcur which will receive the
+					position info */
+	btr_pcur_t*	pcur_donate);	/*!< in: pcur from which the info is
+					copied */
+/**************************************************************//**
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level in the btree */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+#define btr_pcur_open(i,t,md,l,c,m)				\
+	btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
+/**************************************************************//**
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init_func(
+/*============================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page of the
+				record! */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
+				NOTE that if has_search_latch != 0 then
+				we maybe do not acquire a latch on the cursor
+				page, but assume that the caller uses his
+				btr search latch to protect the record! */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mtr */
+#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m)			\
+	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
+
+/*****************************************************************//**
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_pcur_t*	pcur,		/*!< in/out: cursor */
+	bool		init_pcur,	/*!< in: whether to initialize pcur */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf) */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/**************************************************************//**
+Gets the up_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_GE, otherwise undefined */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+	const btr_pcur_t*	cursor); /*!< in: persistent cursor */
+/**************************************************************//**
+Gets the low_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_LE, otherwise undefined */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+	const btr_pcur_t*	cursor); /*!< in: persistent cursor */
+/**************************************************************//**
+If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
+user record satisfying the search condition, in the case PAGE_CUR_L or
+PAGE_CUR_LE, on the last user record. If no such user record exists, then
+in the first case sets the cursor after last in tree, and in the latter case
+before first in tree. The latching mode must be BTR_SEARCH_LEAF or
+BTR_MODIFY_LEAF. */
+UNIV_INTERN
+void
+btr_pcur_open_on_user_rec_func(
+/*===========================*/
+	dict_index_t*	index,		/*!< in: index */
+	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
+	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
+					cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_pcur_open_on_user_rec(i,t,md,l,c,m)				\
+	btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
+/**********************************************************************//**
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos_func(
+/*==========================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in/out: B-tree pcur */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_pcur_open_at_rnd_pos(i,l,c,m)				\
+	btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
+/**************************************************************//**
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+	btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+/**************************************************************//**
+The position of the cursor is stored by taking an initial segment of the
+record the cursor is positioned on, before, or after, and copying it to the
+cursor data structure, or just setting a flag if the cursor id before the
+first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
+page where the cursor is positioned must not be empty if the index tree is
+not totally empty! */
+UNIV_INTERN
+void
+btr_pcur_store_position(
+/*====================*/
+	btr_pcur_t*	cursor, /*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
+Restores the stored position of a persistent cursor bufferfixing the page and
+obtaining the specified latches. If the cursor position was saved when the
+(1) cursor was positioned on a user record: this function restores the position
+to the last record LESS OR EQUAL to the stored record;
+(2) cursor was positioned on a page infimum record: restores the position to
+the last record LESS than the user record which was the successor of the page
+infimum;
+(3) cursor was positioned on the page supremum: restores to the first record
+GREATER than the user record which was the predecessor of the supremum.
+(4) cursor was positioned before the first or after the last in an empty tree:
+restores to before first or after the last in the tree.
+@return TRUE if the cursor position was stored when it was on a user
+record and it can be restored on a user record whose ordering fields
+are identical to the ones of the original user record */
+UNIV_INTERN
+ibool
+btr_pcur_restore_position_func(
+/*===========================*/
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in: detached persistent cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_pcur_restore_position(l,cur,mtr)				\
+	btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
+/*********************************************************//**
+Gets the rel_pos field for a cursor whose position has been stored.
+@return	BTR_PCUR_ON, ... */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/**************************************************************//**
+Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached.
+Function btr_pcur_store_position should be used before calling this,
+if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+	btr_pcur_t*	pcur,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr to commit */
+/*********************************************************//**
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'.
+@return	TRUE if the cursor was not after last in tree */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Moves the persistent cursor to the previous record in the tree. If no records
+are left, the cursor stays 'before first in tree'.
+@return	TRUE if the cursor was not before first in tree */
+UNIV_INTERN
+ibool
+btr_pcur_move_to_prev(
+/*==================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Moves the persistent cursor to the last record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_last_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'.
+@return	TRUE if the cursor moved forward, ending on a user record */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Moves the persistent cursor to the first record on the next page.
+Releases the latch on the current page, and bufferunfixes it.
+Note that there must not be modifications on the current page,
+as then the x-latch can be released only in mtr_commit. */
+UNIV_INTERN
+void
+btr_pcur_move_to_next_page(
+/*=======================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; must be on the
+				last record of the current page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Moves the persistent cursor backward if it is on the first record
+of the page. Releases the latch on the current page, and bufferunfixes
+it. Note that to prevent a possible deadlock, the operation first
+stores the position of the cursor, releases the leaf latch, acquires
+necessary latches and restores the cursor position again before returning.
+The alphabetical position of the cursor is guaranteed to be sensible
+on return, but it may happen that the cursor is not positioned on the
+last record of any page, because the structure of the tree may have
+changed while the cursor had no latches. */
+UNIV_INTERN
+void
+btr_pcur_move_backward_from_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor, must be on the
+				first record of the current page */
+	mtr_t*		mtr);	/*!< in: mtr */
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the btr cursor component of a persistent cursor.
+@return	pointer to btr cursor component */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the page cursor component of a persistent cursor.
+@return	pointer to page cursor component */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+	const btr_pcur_t*	cursor);	/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the page of a persistent cursor.
+@return	pointer to the page */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the buffer block of a persistent cursor.
+@return	pointer to the block */
+UNIV_INLINE
+buf_block_t*
+btr_pcur_get_block(
+/*===============*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Returns the record of a persistent cursor.
+@return	pointer to the record */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+#else /* UNIV_DEBUG */
+# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
+# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
+# define btr_pcur_get_page(cursor) ((cursor)->btr_cur.page_cur.block->frame)
+# define btr_pcur_get_block(cursor) ((cursor)->btr_cur.page_cur.block)
+# define btr_pcur_get_rec(cursor) ((cursor)->btr_cur.page_cur.rec)
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Checks if the persistent cursor is after the last user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Checks if the persistent cursor is before the first user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+	const btr_pcur_t*	cursor);/*!< in: persistent cursor */
+/*********************************************************//**
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************//**
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor);/*!< in/out: persistent cursor */
+/*********************************************************//**
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor);/*!< in/out: persistent cursor */
+/*********************************************************//**
+Moves the persistent cursor to the infimum record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_before_first_on_page(
+/*===============================*/
+	btr_pcur_t*	cursor); /*!< in/out: persistent cursor */
+
+/** Position state of persistent B-tree cursor. */
+enum pcur_pos_t {
+	/** The persistent cursor is not positioned. */
+	BTR_PCUR_NOT_POSITIONED = 0,
+	/** The persistent cursor was previously positioned.
+	TODO: currently, the state can be BTR_PCUR_IS_POSITIONED,
+	though it really should be BTR_PCUR_WAS_POSITIONED,
+	because we have no obligation to commit the cursor with
+	mtr; similarly latch_mode may be out of date. This can
+	lead to problems if btr_pcur is not used the right way;
+	all current code should be ok. */
+	BTR_PCUR_WAS_POSITIONED,
+	/** The persistent cursor is positioned by optimistic get to the same
+	record as it was positioned at. Not used for rel_pos == BTR_PCUR_ON.
+	It may need adjustment depending on previous/current search direction
+	and rel_pos. */
+	BTR_PCUR_IS_POSITIONED_OPTIMISTIC,
+	/** The persistent cursor is positioned by index search.
+	Or optimistic get for rel_pos == BTR_PCUR_ON. */
+	BTR_PCUR_IS_POSITIONED
+};
+
+/* The persistent B-tree cursor structure. This is used mainly for SQL
+selects, updates, and deletes. */
+
+struct btr_pcur_t{
+	btr_cur_t	btr_cur;	/*!< a B-tree cursor */
+	ulint		latch_mode;	/*!< see TODO note below!
+					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
+					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
+					depending on the latching state of
+					the page and tree where the cursor is
+					positioned; BTR_NO_LATCHES means that
+					the cursor is not currently positioned:
+					we say then that the cursor is
+					detached; it can be restored to
+					attached if the old position was
+					stored in old_rec */
+	ulint		old_stored;	/*!< BTR_PCUR_OLD_STORED
+					or BTR_PCUR_OLD_NOT_STORED */
+	rec_t*		old_rec;	/*!< if cursor position is stored,
+					contains an initial segment of the
+					latest record cursor was positioned
+					either on, before, or after */
+	ulint		old_n_fields;	/*!< number of fields in old_rec */
+	ulint		rel_pos;	/*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
+					BTR_PCUR_AFTER, depending on whether
+					cursor was on, before, or after the
+					old_rec record */
+	buf_block_t*	block_when_stored;/* buffer block when the position was
+					stored */
+	ib_uint64_t	modify_clock;	/*!< the modify clock value of the
+					buffer block when the cursor position
+					was stored */
+	enum pcur_pos_t	pos_state;	/*!< btr_pcur_store_position() and
+					btr_pcur_restore_position() state. */
+	ulint		search_mode;	/*!< PAGE_CUR_G, ... */
+	trx_t*		trx_if_known;	/*!< the transaction, if we know it;
+					otherwise this field is not defined;
+					can ONLY BE USED in error prints in
+					fatal assertion failures! */
+	/*-----------------------------*/
+	/* NOTE that the following fields may possess dynamically allocated
+	memory which should be freed if not needed anymore! */
+
+	byte*		old_rec_buf;	/*!< NULL, or a dynamically allocated
+					buffer for old_rec */
+	ulint		buf_size;	/*!< old_rec_buf size if old_rec_buf
+					is not NULL */
+};
+
+#define BTR_PCUR_OLD_STORED	908467085
+#define BTR_PCUR_OLD_NOT_STORED	122766467
+
+#ifndef UNIV_NONINL
+#include "btr0pcur.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
new file mode 100644
index 00000000000..7e355d3709d
--- /dev/null
+++ b/storage/innobase/include/btr0pcur.ic
@@ -0,0 +1,606 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/btr0pcur.ic
+The index tree persistent cursor
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+
+/*********************************************************//**
+Gets the rel_pos field for a cursor whose position has been stored.
+@return	BTR_PCUR_ON, ... */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor);
+	ut_ad(cursor->old_rec);
+	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
+	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(cursor->rel_pos);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Returns the btr cursor component of a persistent cursor.
+@return	pointer to btr cursor component */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	const btr_cur_t*	btr_cur = &cursor->btr_cur;
+	return((btr_cur_t*) btr_cur);
+}
+
+/*********************************************************//**
+Returns the page cursor component of a persistent cursor.
+@return	pointer to page cursor component */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the page of a persistent cursor.
+@return	pointer to the page */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the buffer block of a persistent cursor.
+@return	pointer to the block */
+UNIV_INLINE
+buf_block_t*
+btr_pcur_get_block(
+/*===============*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
+}
+
+/*********************************************************//**
+Returns the record of a persistent cursor.
+@return	pointer to the record */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
+}
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+Gets the up_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_GE, otherwise undefined */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+	const btr_pcur_t*	cursor) /*!< in: persistent cursor */
+{
+	const btr_cur_t*	btr_cursor;
+
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+	      || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
+
+	return(btr_cursor->up_match);
+}
+
+/**************************************************************//**
+Gets the low_match value for a pcur after a search.
+@return number of matched fields at the cursor or to the right if
+search mode was PAGE_CUR_LE, otherwise undefined */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+	const btr_pcur_t*	cursor) /*!< in: persistent cursor */
+{
+	const btr_cur_t*	btr_cursor;
+
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+	      || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+	ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
+
+	return(btr_cursor->low_match);
+}
+
+/*********************************************************//**
+Checks if the persistent cursor is after the last user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*********************************************************//**
+Checks if the persistent cursor is before the first user record on
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*********************************************************//**
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+	const btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	if (btr_pcur_is_before_first_on_page(cursor)
+	    || btr_pcur_is_after_last_on_page(cursor)) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*********************************************************//**
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*********************************************************//**
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*********************************************************//**
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor)	/*!< in/out: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*********************************************************//**
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor)	/*!< in/out: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*********************************************************//**
+Moves the persistent cursor to the last record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_last_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	page_cur_set_after_last(btr_pcur_get_block(cursor),
+				btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*********************************************************//**
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'.
+@return	TRUE if the cursor moved forward, ending on a user record */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+loop:
+	if (btr_pcur_is_after_last_on_page(cursor)) {
+
+		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+
+		btr_pcur_move_to_next_page(cursor, mtr);
+	} else {
+		btr_pcur_move_to_next_on_page(cursor);
+	}
+
+	if (btr_pcur_is_on_user_rec(cursor)) {
+
+		return(TRUE);
+	}
+
+	goto loop;
+}
+
+/*********************************************************//**
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'.
+@return	TRUE if the cursor was not after last in tree */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	if (btr_pcur_is_after_last_on_page(cursor)) {
+
+		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+
+		btr_pcur_move_to_next_page(cursor, mtr);
+
+		return(TRUE);
+	}
+
+	btr_pcur_move_to_next_on_page(cursor);
+
+	return(TRUE);
+}
+
+/**************************************************************//**
+Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached.
+Function btr_pcur_store_position should be used before calling this,
+if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+	btr_pcur_t*	pcur,	/*!< in: persistent cursor */
+	mtr_t*		mtr)	/*!< in: mtr to commit */
+{
+	ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	pcur->latch_mode = BTR_NO_LATCHES;
+
+	mtr_commit(mtr);
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/**************************************************************//**
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
+{
+	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_rec_buf = NULL;
+	pcur->old_rec = NULL;
+}
+
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open_low(
+/*==============*/
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: level in the btree */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+
+	/* Initialize the cursor */
+
+	btr_pcur_init(cursor);
+
+	cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+	cursor->search_mode = mode;
+
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
+				    btr_cursor, 0, file, line, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	cursor->trx_if_known = NULL;
+}
+
+/**************************************************************//**
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init_func(
+/*============================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
+	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page of the
+				record! */
+	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
+				NOTE that if has_search_latch != 0 then
+				we maybe do not acquire a latch on the cursor
+				page, but assume that the caller uses his
+				btr search latch to protect the record! */
+	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = mode;
+
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+				    btr_cursor, has_search_latch,
+				    file, line, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	cursor->trx_if_known = NULL;
+}
+
+/*****************************************************************//**
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+	bool		from_left,	/*!< in: true if open to the low end,
+					false if to the high end */
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: latch mode */
+	btr_pcur_t*	pcur,		/*!< in/out: cursor */
+	bool		init_pcur,	/*!< in: whether to initialize pcur */
+	ulint		level,		/*!< in: level to search for
+					(0=leaf) */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+{
+	pcur->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+	pcur->search_mode = from_left ? PAGE_CUR_G : PAGE_CUR_L;
+
+	if (init_pcur) {
+		btr_pcur_init(pcur);
+	}
+
+	btr_cur_open_at_index_side(from_left, index, latch_mode,
+				   btr_pcur_get_btr_cur(pcur), level, mtr);
+	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	pcur->trx_if_known = NULL;
+}
+
+/**********************************************************************//**
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos_func(
+/*==========================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/*!< in/out: B-tree pcur */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	/* Initialize the cursor */
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = PAGE_CUR_G;
+
+	btr_pcur_init(cursor);
+
+	btr_cur_open_at_rnd_pos_func(index, latch_mode,
+				     btr_pcur_get_btr_cur(cursor),
+				     file, line, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	cursor->trx_if_known = NULL;
+}
+
+/**************************************************************//**
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+	btr_pcur_t*	cursor)	/*!< in: persistent cursor */
+{
+	if (cursor->old_rec_buf != NULL) {
+
+		mem_free(cursor->old_rec_buf);
+
+		cursor->old_rec = NULL;
+		cursor->old_rec_buf = NULL;
+	}
+
+	cursor->btr_cur.page_cur.rec = NULL;
+	cursor->btr_cur.page_cur.block = NULL;
+	cursor->old_rec = NULL;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	cursor->latch_mode = BTR_NO_LATCHES;
+	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
+
+	cursor->trx_if_known = NULL;
+}
+
+/*********************************************************//**
+Moves the persistent cursor to the infimum record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_before_first_on_page(
+/*===============================*/
+	btr_pcur_t*	cursor) /*!< in/out: persistent cursor */
+{
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	page_cur_set_before_first(btr_pcur_get_block(cursor),
+		btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
new file mode 100644
index 00000000000..848bde451a0
--- /dev/null
+++ b/storage/innobase/include/btr0sea.h
@@ -0,0 +1,288 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0sea.h
+The index tree adaptive search
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0sea_h
+#define btr0sea_h
+
+#include "univ.i"
+
+#include "rem0rec.h"
+#include "dict0dict.h"
+#include "btr0types.h"
+#include "mtr0mtr.h"
+#include "ha0ha.h"
+
+/*****************************************************************//**
+Creates and initializes the adaptive search system at a database start. */
+UNIV_INTERN
+void
+btr_search_sys_create(
+/*==================*/
+	ulint	hash_size);	/*!< in: hash index hash table size */
+/*****************************************************************//**
+Frees the adaptive search system at a database shutdown. */
+UNIV_INTERN
+void
+btr_search_sys_free(void);
+/*=====================*/
+
+/********************************************************************//**
+Disable the adaptive hash search system and empty the index. */
+UNIV_INTERN
+void
+btr_search_disable(void);
+/*====================*/
+/********************************************************************//**
+Enable the adaptive hash search system. */
+UNIV_INTERN
+void
+btr_search_enable(void);
+/*====================*/
+
+/********************************************************************//**
+Returns search info for an index.
+@return	search info; search mutex reserved */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+	dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull));
+/*****************************************************************//**
+Creates and initializes a search info struct.
+@return	own: search info struct */
+UNIV_INTERN
+btr_search_t*
+btr_search_info_create(
+/*===================*/
+	mem_heap_t*	heap);	/*!< in: heap where created */
+/*****************************************************************//**
+Returns the value of ref_count. The value is protected by
+btr_search_latch.
+@return	ref_count value. */
+UNIV_INTERN
+ulint
+btr_search_info_get_ref_count(
+/*==========================*/
+	btr_search_t*   info);	/*!< in: search info. */
+/*********************************************************************//**
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index of the cursor */
+	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
+/******************************************************************//**
+Tries to guess the right search position based on the hash search info
+of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
+and the function returns TRUE, then cursor->up_match and cursor->low_match
+both have sensible values.
+@return	TRUE if succeeded */
+UNIV_INTERN
+ibool
+btr_search_guess_on_hash(
+/*=====================*/
+	dict_index_t*	index,		/*!< in: index */
+	btr_search_t*	info,		/*!< in: index search info */
+	const dtuple_t*	tuple,		/*!< in: logical record */
+	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/*!< out: tree cursor */
+	ulint		has_search_latch,/*!< in: latch mode the caller
+					currently has on btr_search_latch:
+					RW_S_LATCH, RW_X_LATCH, or 0 */
+	mtr_t*		mtr);		/*!< in: mtr */
+/********************************************************************//**
+Moves or deletes hash entries for moved records. If new_page is already hashed,
+then the hash index for page, if any, is dropped. If new_page is not hashed,
+and page is hashed, then a new hash index is built to new_page with the same
+parameters as page (this often happens when a page is split). */
+UNIV_INTERN
+void
+btr_search_move_or_delete_hash_entries(
+/*===================================*/
+	buf_block_t*	new_block,	/*!< in: records are copied
+					to this page */
+	buf_block_t*	block,		/*!< in: index page from which
+					records were copied, and the
+					copied records will be deleted
+					from this page */
+	dict_index_t*	index);		/*!< in: record descriptor */
+/********************************************************************//**
+Drops a page hash index. */
+UNIV_INTERN
+void
+btr_search_drop_page_hash_index(
+/*============================*/
+	buf_block_t*	block);	/*!< in: block containing index page,
+				s- or x-latched, or an index page
+				for which we know that
+				block->buf_fix_count == 0 */
+/********************************************************************//**
+Drops a possible page hash index when a page is evicted from the buffer pool
+or freed in a file segment. */
+UNIV_INTERN
+void
+btr_search_drop_page_hash_when_freed(
+/*=================================*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no);	/*!< in: page number */
+/********************************************************************//**
+Updates the page hash index when a single record is inserted on a page. */
+UNIV_INTERN
+void
+btr_search_update_hash_node_on_insert(
+/*==================================*/
+	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
+				place to insert using btr_cur_search_...,
+				and the new record has been inserted next
+				to the cursor */
+/********************************************************************//**
+Updates the page hash index when a single record is inserted on a page. */
+UNIV_INTERN
+void
+btr_search_update_hash_on_insert(
+/*=============================*/
+	btr_cur_t*	cursor);/*!< in: cursor which was positioned to the
+				place to insert using btr_cur_search_...,
+				and the new record has been inserted next
+				to the cursor */
+/********************************************************************//**
+Updates the page hash index when a single record is deleted from a page. */
+UNIV_INTERN
+void
+btr_search_update_hash_on_delete(
+/*=============================*/
+	btr_cur_t*	cursor);/*!< in: cursor which was positioned on the
+				record to delete using btr_cur_search_...,
+				the record is not yet deleted */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/********************************************************************//**
+Validates the search system.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+btr_search_validate(void);
+/*======================*/
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+
+/** The search info struct in an index */
+struct btr_search_t{
+	ulint	ref_count;	/*!< Number of blocks in this index tree
+				that have search index built
+				i.e. block->index points to this index.
+				Protected by btr_search_latch except
+				when during initialization in
+				btr_search_info_create(). */
+
+	/* @{ The following fields are not protected by any latch.
+	Unfortunately, this means that they must be aligned to
+	the machine word, i.e., they cannot be turned into bit-fields. */
+	buf_block_t* root_guess;/*!< the root page frame when it was last time
+				fetched, or NULL */
+	ulint	hash_analysis;	/*!< when this exceeds
+				BTR_SEARCH_HASH_ANALYSIS, the hash
+				analysis starts; this is reset if no
+				success noticed */
+	ibool	last_hash_succ;	/*!< TRUE if the last search would have
+				succeeded, or did succeed, using the hash
+				index; NOTE that the value here is not exact:
+				it is not calculated for every search, and the
+				calculation itself is not always accurate! */
+	ulint	n_hash_potential;
+				/*!< number of consecutive searches
+				which would have succeeded, or did succeed,
+				using the hash index;
+				the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
+	/* @} */
+	/*---------------------- @{ */
+	ulint	n_fields;	/*!< recommended prefix length for hash search:
+				number of full fields */
+	ulint	n_bytes;	/*!< recommended prefix: number of bytes in
+				an incomplete field
+				@see BTR_PAGE_MAX_REC_SIZE */
+	ibool	left_side;	/*!< TRUE or FALSE, depending on whether
+				the leftmost record of several records with
+				the same prefix should be indexed in the
+				hash index */
+	/*---------------------- @} */
+#ifdef UNIV_SEARCH_PERF_STAT
+	ulint	n_hash_succ;	/*!< number of successful hash searches thus
+				far */
+	ulint	n_hash_fail;	/*!< number of failed hash searches */
+	ulint	n_patt_succ;	/*!< number of successful pattern searches thus
+				far */
+	ulint	n_searches;	/*!< number of searches */
+#endif /* UNIV_SEARCH_PERF_STAT */
+#ifdef UNIV_DEBUG
+	ulint	magic_n;	/*!< magic number @see BTR_SEARCH_MAGIC_N */
+/** value of btr_search_t::magic_n, used in assertions */
+# define BTR_SEARCH_MAGIC_N	1112765
+#endif /* UNIV_DEBUG */
+};
+
+/** The hash index system */
+struct btr_search_sys_t{
+	hash_table_t*	hash_index;	/*!< the adaptive hash index,
+					mapping dtuple_fold values
+					to rec_t pointers on index pages */
+};
+
+/** The adaptive hash index */
+extern btr_search_sys_t*	btr_search_sys;
+
+#ifdef UNIV_SEARCH_PERF_STAT
+/** Number of successful adaptive hash index lookups */
+extern ulint	btr_search_n_succ;
+/** Number of failed adaptive hash index lookups */
+extern ulint	btr_search_n_hash_fail;
+#endif /* UNIV_SEARCH_PERF_STAT */
+
+/** After change in n_fields or n_bytes in info, this many rounds are waited
+before starting the hash analysis again: this is to save CPU time when there
+is no hope in building a hash index. */
+#define BTR_SEARCH_HASH_ANALYSIS	17
+
+/** Limit of consecutive searches for trying a search shortcut on the search
+pattern */
+#define BTR_SEARCH_ON_PATTERN_LIMIT	3
+
+/** Limit of consecutive searches for trying a search shortcut using
+the hash index */
+#define BTR_SEARCH_ON_HASH_LIMIT	3
+
+/** We do this many searches before trying to keep the search latch
+over calls from MySQL. If we notice someone waiting for the latch, we
+again set this much timeout. This is to reduce contention. */
+#define BTR_SEA_TIMEOUT			10000
+
+#ifndef UNIV_NONINL
+#include "btr0sea.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
new file mode 100644
index 00000000000..0bd869be136
--- /dev/null
+++ b/storage/innobase/include/btr0sea.ic
@@ -0,0 +1,82 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0sea.ic
+The index tree adaptive search
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "dict0mem.h"
+#include "btr0cur.h"
+#include "buf0buf.h"
+
+/*********************************************************************//**
+Updates the search info. */
+UNIV_INTERN
+void
+btr_search_info_update_slow(
+/*========================*/
+	btr_search_t*	info,	/*!< in/out: search info */
+	btr_cur_t*	cursor);/*!< in: cursor which was just positioned */
+
+/********************************************************************//**
+Returns search info for an index.
+@return	search info; search mutex reserved */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+	dict_index_t*	index)	/*!< in: index */
+{
+	return(index->search_info);
+}
+
+/*********************************************************************//**
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index of the cursor */
+	btr_cur_t*	cursor)	/*!< in: cursor which was just positioned */
+{
+	btr_search_t*	info;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	info = btr_search_get_info(index);
+
+	info->hash_analysis++;
+
+	if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
+
+		/* Do nothing */
+
+		return;
+
+	}
+
+	ut_ad(cursor->flag != BTR_CUR_HASH);
+
+	btr_search_info_update_slow(info, cursor);
+}
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
new file mode 100644
index 00000000000..c1a4531f861
--- /dev/null
+++ b/storage/innobase/include/btr0types.h
@@ -0,0 +1,203 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/btr0types.h
+The index tree general types
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0types_h
+#define btr0types_h
+
+#include "univ.i"
+
+#include "rem0types.h"
+#include "page0types.h"
+#include "sync0rw.h"
+
+/** Persistent cursor */
+struct btr_pcur_t;
+/** B-tree cursor */
+struct btr_cur_t;
+/** B-tree search information for the adaptive hash index */
+struct btr_search_t;
+
+#ifndef UNIV_HOTBACKUP
+
+/** @brief The latch protecting the adaptive search system
+
+This latch protects the
+(1) hash index;
+(2) columns of a record to which we have a pointer in the hash index;
+
+but does NOT protect:
+
+(3) next record offset field in a record;
+(4) next or previous records on the same page.
+
+Bear in mind (3) and (4) when using the hash index.
+*/
+extern rw_lock_t*	btr_search_latch_temp;
+
+#endif /* UNIV_HOTBACKUP */
+
+/** The latch protecting the adaptive search system */
+#define btr_search_latch	(*btr_search_latch_temp)
+
+/** Flag: has the search system been enabled?
+Protected by btr_search_latch. */
+extern char	btr_search_enabled;
+
+#ifdef UNIV_BLOB_DEBUG
+# include "buf0types.h"
+/** An index->blobs entry for keeping track of off-page column references */
+struct btr_blob_dbg_t;
+
+/** Insert to index->blobs a reference to an off-page column.
+@param index	the index tree
+@param b	the reference
+@param ctx	context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_insert(
+/*====================*/
+	dict_index_t*		index,	/*!< in/out: index tree */
+	const btr_blob_dbg_t*	b,	/*!< in: the reference */
+	const char*		ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+
+/** Remove from index->blobs a reference to an off-page column.
+@param index	the index tree
+@param b	the reference
+@param ctx	context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_delete(
+/*====================*/
+	dict_index_t*		index,	/*!< in/out: index tree */
+	const btr_blob_dbg_t*	b,	/*!< in: the reference */
+	const char*		ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+
+/**************************************************************//**
+Add to index->blobs any references to off-page columns from a record.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add_rec(
+/*=================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in/out: index */
+	const ulint*	offsets,/*!< in: offsets */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Remove from index->blobs any references to off-page columns from a record.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove_rec(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in/out: index */
+	const ulint*	offsets,/*!< in: offsets */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Count and add to index->blobs any references to off-page columns
+from records on a page.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add(
+/*=============*/
+	const page_t*	page,	/*!< in: rewritten page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Count and remove from index->blobs any references to off-page columns
+from records on a page.
+Used when reorganizing a page, before copying the records.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove(
+/*================*/
+	const page_t*	page,	/*!< in: b-tree page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Restore in index->blobs any references to off-page columns
+Used when page reorganize fails due to compressed page overflow. */
+UNIV_INTERN
+void
+btr_blob_dbg_restore(
+/*=================*/
+	const page_t*	npage,	/*!< in: page that failed to compress */
+	const page_t*	page,	/*!< in: copy of original page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+
+/** Operation that processes the BLOB references of an index record
+@param[in]	rec	record on index page
+@param[in/out]	index	the index tree of the record
+@param[in]	offsets	rec_get_offsets(rec,index)
+@param[in]	ctx	context (for logging)
+@return			number of BLOB references processed */
+typedef ulint (*btr_blob_dbg_op_f)
+(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
+
+/**************************************************************//**
+Count and process all references to off-page columns on a page.
+@return number of references processed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_op(
+/*============*/
+	const page_t*		page,	/*!< in: B-tree leaf page */
+	const rec_t*		rec,	/*!< in: record to start from
+					(NULL to process the whole page) */
+	dict_index_t*		index,	/*!< in/out: index */
+	const char*		ctx,	/*!< in: context (for logging) */
+	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
+	__attribute__((nonnull(1,3,4,5)));
+#else /* UNIV_BLOB_DEBUG */
+# define btr_blob_dbg_add_rec(rec, index, offsets, ctx)		((void) 0)
+# define btr_blob_dbg_add(page, index, ctx)			((void) 0)
+# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx)	((void) 0)
+# define btr_blob_dbg_remove(page, index, ctx)			((void) 0)
+# define btr_blob_dbg_restore(npage, page, index, ctx)		((void) 0)
+# define btr_blob_dbg_op(page, rec, index, ctx, op)		((void) 0)
+#endif /* UNIV_BLOB_DEBUG */
+
+/** The size of a reference to data stored on a different page.
+The reference is stored at the end of the prefix of the field
+in the index record. */
+#define BTR_EXTERN_FIELD_REF_SIZE	20
+
+/** A BLOB field reference full of zero, for use in assertions and tests.
+Initially, BLOB field references are set to zero, in
+dtuple_convert_big_rec(). */
+extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
+
+#endif
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
new file mode 100644
index 00000000000..fab9a4b828b
--- /dev/null
+++ b/storage/innobase/include/buf0buddy.h
@@ -0,0 +1,77 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buddy.h
+Binary buddy allocator for compressed pages
+
+Created December 2006 by Marko Makela
+*******************************************************/
+
+#ifndef buf0buddy_h
+#define buf0buddy_h
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "univ.i"
+#include "buf0types.h"
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any
+block->mutex.  The buf_pool->mutex may be released and reacquired.
+This function should only be used for allocating compressed page frames.
+@return	allocated block, never NULL */
+UNIV_INLINE
+byte*
+buf_buddy_alloc(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool in which
+					the page resides */
+	ulint		size,		/*!< in: compressed page size
+					(between UNIV_ZIP_SIZE_MIN and
+					UNIV_PAGE_SIZE) */
+	ibool*		lru)		/*!< in: pointer to a variable
+					that will be assigned TRUE if
+				       	storage was allocated from the
+				       	LRU list and buf_pool->mutex was
+				       	temporarily released */
+	__attribute__((malloc, nonnull));
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INLINE
+void
+buf_buddy_free(
+/*===========*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool in which
+					the block resides */
+	void*		buf,		/*!< in: block to be freed, must not
+					be pointed to by the buffer pool */
+	ulint		size)		/*!< in: block size,
+					up to UNIV_PAGE_SIZE */
+	__attribute__((nonnull));
+
+#ifndef UNIV_NONINL
+# include "buf0buddy.ic"
+#endif
+
+#endif /* buf0buddy_h */
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
new file mode 100644
index 00000000000..be2f950162d
--- /dev/null
+++ b/storage/innobase/include/buf0buddy.ic
@@ -0,0 +1,143 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buddy.ic
+Binary buddy allocator for compressed pages
+
+Created December 2006 by Marko Makela
+*******************************************************/
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "buf0buf.h"
+#include "buf0buddy.h"
+#include "ut0ut.h"
+#include "sync0sync.h"
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
+The buf_pool_mutex may be released and reacquired.
+@return	allocated block, never NULL */
+UNIV_INTERN
+void*
+buf_buddy_alloc_low(
+/*================*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	ulint		i,		/*!< in: index of buf_pool->zip_free[],
+					or BUF_BUDDY_SIZES */
+	ibool*		lru)		/*!< in: pointer to a variable that
+					will be assigned TRUE if storage was
+					allocated from the LRU list and
+					buf_pool->mutex was temporarily
+					released */
+	__attribute__((malloc, nonnull));
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INTERN
+void
+buf_buddy_free_low(
+/*===============*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	void*		buf,		/*!< in: block to be freed, must not be
+					pointed to by the buffer pool */
+	ulint		i)		/*!< in: index of buf_pool->zip_free[],
+					or BUF_BUDDY_SIZES */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Get the index of buf_pool->zip_free[] for a given block size.
+@return	index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
+UNIV_INLINE
+ulint
+buf_buddy_get_slot(
+/*===============*/
+	ulint	size)	/*!< in: block size */
+{
+	ulint	i;
+	ulint	s;
+
+	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
+
+	for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
+	}
+
+	ut_ad(i <= BUF_BUDDY_SIZES);
+	return(i);
+}
+
+/**********************************************************************//**
+Allocate a block.  The thread calling this function must hold
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any
+block->mutex.  The buf_pool->mutex may be released and reacquired.
+This function should only be used for allocating compressed page frames.
+@return	allocated block, never NULL */
+UNIV_INLINE
+byte*
+buf_buddy_alloc(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool in which
+					the page resides */
+	ulint		size,		/*!< in: compressed page size
+					(between UNIV_ZIP_SIZE_MIN and
+					UNIV_PAGE_SIZE) */
+	ibool*		lru)		/*!< in: pointer to a variable
+					that will be assigned TRUE if
+				       	storage was allocated from the
+				       	LRU list and buf_pool->mutex was
+				       	temporarily released */
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(ut_is_2pow(size));
+	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
+	ut_ad(size <= UNIV_PAGE_SIZE);
+
+	return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
+					   lru));
+}
+
+/**********************************************************************//**
+Deallocate a block. */
+UNIV_INLINE
+void
+buf_buddy_free(
+/*===========*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool in which
+					the block resides */
+	void*		buf,		/*!< in: block to be freed, must not
+					be pointed to by the buffer pool */
+	ulint		size)		/*!< in: block size,
+					up to UNIV_PAGE_SIZE */
+{
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(ut_is_2pow(size));
+	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
+	ut_ad(size <= UNIV_PAGE_SIZE);
+
+	buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
+}
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
new file mode 100644
index 00000000000..b669bd203e0
--- /dev/null
+++ b/storage/innobase/include/buf0buf.h
@@ -0,0 +1,2179 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.h
+The database buffer pool high-level routines
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0buf_h
+#define buf0buf_h
+
+#include "univ.i"
+#include "fil0fil.h"
+#include "mtr0types.h"
+#include "buf0types.h"
+#include "hash0hash.h"
+#include "ut0byte.h"
+#include "page0types.h"
+#ifndef UNIV_HOTBACKUP
+#include "ut0rbt.h"
+#include "os0proc.h"
+#include "log0log.h"
+
+/** @name Modes for buf_page_get_gen */
+/* @{ */
+#define BUF_GET			10	/*!< get always */
+#define	BUF_GET_IF_IN_POOL	11	/*!< get if in pool */
+#define BUF_PEEK_IF_IN_POOL	12	/*!< get if in pool, do not make
+					the block young in the LRU list */
+#define BUF_GET_NO_LATCH	14	/*!< get and bufferfix, but
+					set no latch; we have
+					separated this case, because
+					it is error-prone programming
+					not to set a latch, and it
+					should be used with care */
+#define BUF_GET_IF_IN_POOL_OR_WATCH	15
+					/*!< Get the page only if it's in the
+					buffer pool, if not then set a watch
+					on the page. */
+#define BUF_GET_POSSIBLY_FREED		16
+					/*!< Like BUF_GET, but do not mind
+					if the file page has been freed. */
+/* @} */
+/** @name Modes for buf_page_get_known_nowait */
+/* @{ */
+#define BUF_MAKE_YOUNG	51		/*!< Move the block to the
+					start of the LRU list if there
+					is a danger that the block
+					would drift out of the buffer
+					pool*/
+#define BUF_KEEP_OLD	52		/*!< Preserve the current LRU
+					position of the block. */
+/* @} */
+
+#define MAX_BUFFER_POOLS_BITS	6	/*!< Number of bits to representing
+					a buffer pool ID */
+
+#define MAX_BUFFER_POOLS 	(1 << MAX_BUFFER_POOLS_BITS)
+					/*!< The maximum number of buffer
+					pools that can be defined */
+
+#define BUF_POOL_WATCH_SIZE		(srv_n_purge_threads + 1)
+					/*!< Maximum number of concurrent
+					buffer pool watches */
+#define MAX_PAGE_HASH_LOCKS	1024	/*!< The maximum number of
+					page_hash locks */
+
+extern	buf_pool_t*	buf_pool_ptr;	/*!< The buffer pools
+					of the database */
+#ifdef UNIV_DEBUG
+extern ibool		buf_debug_prints;/*!< If this is set TRUE, the program
+					prints info whenever read or flush
+					occurs */
+#endif /* UNIV_DEBUG */
+extern ulint srv_buf_pool_instances;
+extern ulint srv_buf_pool_curr_size;
+#else /* !UNIV_HOTBACKUP */
+extern buf_block_t*	back_block1;	/*!< first block, for --apply-log */
+extern buf_block_t*	back_block2;	/*!< second block, for page reorganize */
+#endif /* !UNIV_HOTBACKUP */
+
+/** Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
+
+/** @brief States of a control block
+@see buf_page_t
+
+The enumeration values must be 0..7. */
+enum buf_page_state {
+	BUF_BLOCK_POOL_WATCH,		/*!< a sentinel for the buffer pool
+					watch, element of buf_pool->watch[] */
+	BUF_BLOCK_ZIP_PAGE,		/*!< contains a clean
+					compressed page */
+	BUF_BLOCK_ZIP_DIRTY,		/*!< contains a compressed
+					page that is in the
+					buf_pool->flush_list */
+
+	BUF_BLOCK_NOT_USED,		/*!< is in the free list;
+					must be after the BUF_BLOCK_ZIP_
+					constants for compressed-only pages
+					@see buf_block_state_valid() */
+	BUF_BLOCK_READY_FOR_USE,	/*!< when buf_LRU_get_free_block
+					returns a block, it is in this state */
+	BUF_BLOCK_FILE_PAGE,		/*!< contains a buffered file page */
+	BUF_BLOCK_MEMORY,		/*!< contains some main memory
+					object */
+	BUF_BLOCK_REMOVE_HASH		/*!< hash index should be removed
+					before putting to the free list */
+};
+
+
+/** This structure defines information we will fetch from each buffer pool. It
+will be used to print table IO stats */
+struct buf_pool_info_t{
+	/* General buffer pool info */
+	ulint	pool_unique_id;		/*!< Buffer Pool ID */
+	ulint	pool_size;		/*!< Buffer Pool size in pages */
+	ulint	lru_len;		/*!< Length of buf_pool->LRU */
+	ulint	old_lru_len;		/*!< buf_pool->LRU_old_len */
+	ulint	free_list_len;		/*!< Length of buf_pool->free list */
+	ulint	flush_list_len;		/*!< Length of buf_pool->flush_list */
+	ulint	n_pend_unzip;		/*!< buf_pool->n_pend_unzip, pages
+					pending decompress */
+	ulint	n_pend_reads;		/*!< buf_pool->n_pend_reads, pages
+					pending read */
+	ulint	n_pending_flush_lru;	/*!< Pages pending flush in LRU */
+	ulint	n_pending_flush_single_page;/*!< Pages pending to be
+					flushed as part of single page
+					flushes issued by various user
+					threads */
+	ulint	n_pending_flush_list;	/*!< Pages pending flush in FLUSH
+					LIST */
+	ulint	n_pages_made_young;	/*!< number of pages made young */
+	ulint	n_pages_not_made_young;	/*!< number of pages not made young */
+	ulint	n_pages_read;		/*!< buf_pool->n_pages_read */
+	ulint	n_pages_created;	/*!< buf_pool->n_pages_created */
+	ulint	n_pages_written;	/*!< buf_pool->n_pages_written */
+	ulint	n_page_gets;		/*!< buf_pool->n_page_gets */
+	ulint	n_ra_pages_read_rnd;	/*!< buf_pool->n_ra_pages_read_rnd,
+					number of pages readahead */
+	ulint	n_ra_pages_read;	/*!< buf_pool->n_ra_pages_read, number
+					of pages readahead */
+	ulint	n_ra_pages_evicted;	/*!< buf_pool->n_ra_pages_evicted,
+					number of readahead pages evicted
+					without access */
+	ulint	n_page_get_delta;	/*!< num of buffer pool page gets since
+					last printout */
+
+	/* Buffer pool access stats */
+	double	page_made_young_rate;	/*!< page made young rate in pages
+					per second */
+	double	page_not_made_young_rate;/*!< page not made young rate
+					in pages per second */
+	double	pages_read_rate;	/*!< num of pages read per second */
+	double	pages_created_rate;	/*!< num of pages create per second */
+	double	pages_written_rate;	/*!< num of  pages written per second */
+	ulint	page_read_delta;	/*!< num of pages read since last
+					printout */
+	ulint	young_making_delta;	/*!< num of pages made young since
+					last printout */
+	ulint	not_young_making_delta;	/*!< num of pages not make young since
+					last printout */
+
+	/* Statistics about read ahead algorithm.  */
+	double	pages_readahead_rnd_rate;/*!< random readahead rate in pages per
+					second */
+	double	pages_readahead_rate;	/*!< readahead rate in pages per
+					second */
+	double	pages_evicted_rate;	/*!< rate of readahead page evicted
+					without access, in pages per second */
+
+	/* Stats about LRU eviction */
+	ulint	unzip_lru_len;		/*!< length of buf_pool->unzip_LRU
+					list */
+	/* Counters for LRU policy */
+	ulint	io_sum;			/*!< buf_LRU_stat_sum.io */
+	ulint	io_cur;			/*!< buf_LRU_stat_cur.io, num of IO
+					for current interval */
+	ulint	unzip_sum;		/*!< buf_LRU_stat_sum.unzip */
+	ulint	unzip_cur;		/*!< buf_LRU_stat_cur.unzip, num
+					pages decompressed in current
+					interval */
+};
+
+/** The occupied bytes of lists in all buffer pools */
+struct buf_pools_list_size_t {
+	ulint	LRU_bytes;		/*!< LRU size in bytes */
+	ulint	unzip_LRU_bytes;	/*!< unzip_LRU size in bytes */
+	ulint	flush_list_bytes;	/*!< flush_list size in bytes */
+};
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Acquire mutex on all buffer pool instances */
+UNIV_INLINE
+void
+buf_pool_mutex_enter_all(void);
+/*===========================*/
+
+/********************************************************************//**
+Release mutex on all buffer pool instances */
+UNIV_INLINE
+void
+buf_pool_mutex_exit_all(void);
+/*==========================*/
+
+/********************************************************************//**
+Creates the buffer pool.
+@return	DB_SUCCESS if success, DB_ERROR if not enough memory or error */
+UNIV_INTERN
+dberr_t
+buf_pool_init(
+/*=========*/
+	ulint	size,		/*!< in: Size of the total pool in bytes */
+	ulint	n_instances);	/*!< in: Number of instances */
+/********************************************************************//**
+Frees the buffer pool at shutdown.  This must not be invoked before
+freeing all mutexes. */
+UNIV_INTERN
+void
+buf_pool_free(
+/*==========*/
+	ulint	n_instances);	/*!< in: numbere of instances to free */
+
+/********************************************************************//**
+Clears the adaptive hash index on all pages in the buffer pool. */
+UNIV_INTERN
+void
+buf_pool_clear_hash_index(void);
+/*===========================*/
+
+/********************************************************************//**
+Relocate a buffer control block.  Relocates the block on the LRU list
+and in buf_pool->page_hash.  Does not relocate bpage->list.
+The caller must take care of relocating bpage->list. */
+UNIV_INTERN
+void
+buf_relocate(
+/*=========*/
+	buf_page_t*	bpage,	/*!< in/out: control block being relocated;
+				buf_page_get_state(bpage) must be
+				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
+	buf_page_t*	dpage)	/*!< in/out: destination control block */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void);
+/*========================*/
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in frames.
+@return	size in pages */
+UNIV_INLINE
+ulint
+buf_pool_get_n_pages(void);
+/*=======================*/
+/********************************************************************//**
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+zero if all modified pages have been flushed to disk.
+@return	oldest modification in pool, zero if none */
+UNIV_INTERN
+lsn_t
+buf_pool_get_oldest_modification(void);
+/*==================================*/
+
+/********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+	__attribute__((malloc));
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: bpage descriptor to free. */
+	__attribute__((nonnull));
+
+/********************************************************************//**
+Allocates a buffer block.
+@return	own: the allocated block, in state BUF_BLOCK_MEMORY */
+UNIV_INTERN
+buf_block_t*
+buf_block_alloc(
+/*============*/
+	buf_pool_t*	buf_pool);	/*!< in: buffer pool instance,
+					or NULL for round-robin selection
+					of the buffer pool */
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+	buf_block_t*	block);	/*!< in, own: block to be freed */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Copies contents of a buffer frame to a given buffer.
+@return	buf */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+	byte*			buf,	/*!< in: buffer to copy to */
+	const buf_frame_t*	frame);	/*!< in: buffer frame */
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+NOTE! The following macros should be used instead of buf_page_get_gen,
+to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
+in LA! */
+#define buf_page_get(SP, ZS, OF, LA, MTR)	 buf_page_get_gen(\
+				SP, ZS, OF, LA, NULL,\
+				BUF_GET, __FILE__, __LINE__, MTR)
+/**************************************************************//**
+Use these macros to bufferfix a page with no latching. Remember not to
+read the contents of the page unless you know it is safe. Do not modify
+the contents of the page! We have separated this case, because it is
+error-prone programming not to set a latch, and it should be used
+with care. */
+#define buf_page_get_with_no_latch(SP, ZS, OF, MTR)	   buf_page_get_gen(\
+				SP, ZS, OF, RW_NO_LATCH, NULL,\
+				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+/********************************************************************//**
+This is the general function used to get optimistic access to a database
+page.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+buf_page_optimistic_get(
+/*====================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: guessed block */
+	ib_uint64_t	modify_clock,/*!< in: modify clock value */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+/********************************************************************//**
+This is used to get access to a known database page, when no waiting can be
+done.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+	buf_block_t*	block,	/*!< in: the known page */
+	ulint		mode,	/*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/*******************************************************************//**
+Given a tablespace id and page number tries to get that page. If the
+page is not in the buffer pool it is not loaded and NULL is returned.
+Suitable for using when holding the lock_sys_t::mutex. */
+UNIV_INTERN
+const buf_block_t*
+buf_page_try_get_func(
+/*==================*/
+	ulint		space_id,/*!< in: tablespace id */
+	ulint		page_no,/*!< in: page number */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/** Tries to get a page. If the page is not in the buffer pool it is
+not loaded.  Suitable for using when holding the lock_sys_t::mutex.
+@param space_id	in: tablespace id
+@param page_no	in: page number
+@param mtr	in: mini-transaction
+@return		the page if in buffer pool, NULL if not */
+#define buf_page_try_get(space_id, page_no, mtr)	\
+	buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr);
+
+/********************************************************************//**
+Get read access to a compressed page (usually of type
+FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
+The page must be released with buf_page_release_zip().
+NOTE: the page is not protected by any latch.  Mutual exclusion has to
+be implemented at a higher level.  In other words, all possible
+accesses to a given page through this function must be protected by
+the same set of mutexes or latches.
+@return	pointer to the block, or NULL if not compressed */
+UNIV_INTERN
+buf_page_t*
+buf_page_get_zip(
+/*=============*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size */
+	ulint		offset);/*!< in: page number */
+/********************************************************************//**
+This is the general function used to get access to a database page.
+@return	pointer to the block or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_page_get_gen(
+/*=============*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint		offset,	/*!< in: page number */
+	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+	buf_block_t*	guess,	/*!< in: guessed block or NULL */
+	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
+				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or
+				BUF_GET_IF_IN_POOL_OR_WATCH */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+/********************************************************************//**
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_get_gen).
+@return	pointer to the block, page bufferfixed */
+UNIV_INTERN
+buf_block_t*
+buf_page_create(
+/*============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset,	/*!< in: offset of the page within space in units of
+			a page */
+	ulint	zip_size,/*!< in: compressed page size, or 0 */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
+UNIV_INTERN
+void
+buf_page_init_for_backup_restore(
+/*=============================*/
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space
+				in units of a page */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	buf_block_t*	block);	/*!< in: block to init */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+	buf_page_t*	bpage);		/*!< in: buffer block */
+/********************************************************************//**
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		rw_latch);	/*!< in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+/********************************************************************//**
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from slipping out of
+the buffer pool. */
+UNIV_INTERN
+void
+buf_page_make_young(
+/*================*/
+	buf_page_t*	bpage);	/*!< in: buffer block of a file page */
+/********************************************************************//**
+Returns TRUE if the page can be found in the buffer pool hash table.
+
+NOTE that it is possible that the page is not yet read from disk,
+though.
+
+@return	TRUE if found in the page hash table */
+UNIV_INLINE
+ibool
+buf_page_peek(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+/********************************************************************//**
+Sets file_page_was_freed TRUE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return	control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_set_file_page_was_freed(
+/*=============================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
+Sets file_page_was_freed FALSE if the page is found in the buffer pool.
+This function should be called when we free a file page and want the
+debug version to check that it is not accessed any more unless
+reallocated.
+@return	control block if found in page hash table, otherwise NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_reset_file_page_was_freed(
+/*===============================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);	/*!< in: page number */
+#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_page_get_freed_page_clock(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+	__attribute__((pure));
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_block_get_freed_page_clock(
+/*===========================*/
+	const buf_block_t*	block)	/*!< in: block */
+	__attribute__((pure));
+
+/********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return	TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+	const buf_page_t*	bpage);	/*!< in: block */
+/********************************************************************//**
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex.
+@return	TRUE if should be made younger */
+UNIV_INLINE
+ibool
+buf_page_peek_if_too_old(
+/*=====================*/
+	const buf_page_t*	bpage);	/*!< in: block to make younger */
+/********************************************************************//**
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet.
+@return	newest modification to page */
+UNIV_INLINE
+lsn_t
+buf_page_get_newest_modification(
+/*=============================*/
+	const buf_page_t*	bpage);	/*!< in: block containing the
+					page frame */
+/********************************************************************//**
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+void
+buf_block_modify_clock_inc(
+/*=======================*/
+	buf_block_t*	block);	/*!< in: block */
+/********************************************************************//**
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block.
+@return	value */
+UNIV_INLINE
+ib_uint64_t
+buf_block_get_modify_clock(
+/*=======================*/
+	buf_block_t*	block);	/*!< in: block */
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+# ifdef UNIV_SYNC_DEBUG
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line */
+# endif /* UNIV_SYNC_DEBUG */
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+	__attribute__((nonnull));
+
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_fix(
+/*===========*/
+	buf_block_t*	block);	/*!< in/out: block to bufferfix */
+
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_unfix(
+/*===========*/
+	buf_block_t*	block);	/*!< in/out: block to bufferfix */
+
+# ifdef UNIV_SYNC_DEBUG
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+# else /* UNIV_SYNC_DEBUG */
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+# endif /* UNIV_SYNC_DEBUG */
+#else /* !UNIV_HOTBACKUP */
+# define buf_block_modify_clock_inc(block) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Checks if a page is corrupt.
+@return	TRUE if corrupted */
+UNIV_INTERN
+ibool
+buf_page_is_corrupted(
+/*==================*/
+	bool		check_lsn,	/*!< in: true if we need to check the
+					and complain about the LSN */
+	const byte*	read_buf,	/*!< in: a database page */
+	ulint		zip_size)	/*!< in: size of compressed page;
+					0 for uncompressed pages */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Checks if a page is all zeroes.
+@return	TRUE if the page is all zeroes */
+bool
+buf_page_is_zeroes(
+/*===============*/
+	const byte*	read_buf,	/*!< in: a database page */
+	const ulint	zip_size);	/*!< in: size of compressed page;
+					0 for uncompressed pages */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	const void*	ptr,	/*!< in: pointer to a buffer frame */
+	ulint*		space,	/*!< out: space id */
+	fil_addr_t*	addr);	/*!< out: page offset and byte offset */
+/**********************************************************************//**
+Gets the hash value of a block. This can be used in searches in the
+lock hash table.
+@return	lock hash value */
+UNIV_INLINE
+ulint
+buf_block_get_lock_hash_val(
+/*========================*/
+	const buf_block_t*	block)	/*!< in: block */
+	__attribute__((pure));
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Finds a block in the buffer pool that points to a
+given compressed page.
+@return	buffer block pointing to the compressed page, or NULL */
+UNIV_INTERN
+buf_block_t*
+buf_pool_contains_zip(
+/*==================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	const void*	data);		/*!< in: pointer to compressed page */
+#endif /* UNIV_DEBUG */
+
+/***********************************************************************
+FIXME_FTS: Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+                        /* out: pointer to frame */
+        byte*   ptr);   /* in: pointer to a frame */
+
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Validates the buffer pool data structure.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_validate(void);
+/*==============*/
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/*********************************************************************//**
+Prints info of the buffer pool data structure. */
+UNIV_INTERN
+void
+buf_print(void);
+/*============*/
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+enum buf_page_print_flags {
+	/** Do not crash at the end of buf_page_print(). */
+	BUF_PAGE_PRINT_NO_CRASH	= 1,
+	/** Do not print the full page dump. */
+	BUF_PAGE_PRINT_NO_FULL = 2
+};
+
+/********************************************************************//**
+Prints a page to stderr. */
+UNIV_INTERN
+void
+buf_page_print(
+/*===========*/
+	const byte*	read_buf,	/*!< in: a database page */
+	ulint		zip_size,	/*!< in: compressed page size, or
+					0 for uncompressed pages */
+	ulint		flags)		/*!< in: 0 or
+					BUF_PAGE_PRINT_NO_CRASH or
+					BUF_PAGE_PRINT_NO_FULL */
+	UNIV_COLD __attribute__((nonnull));
+/********************************************************************//**
+Decompress a block.
+@return	TRUE if successful */
+UNIV_INTERN
+ibool
+buf_zip_decompress(
+/*===============*/
+	buf_block_t*	block,	/*!< in/out: block */
+	ibool		check);	/*!< in: TRUE=verify the page checksum */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the number of latched pages in the buffer pool.
+@return	number of latched pages */
+UNIV_INTERN
+ulint
+buf_get_latched_pages_number(void);
+/*==============================*/
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Returns the number of pending buf pool read ios.
+@return	number of pending read I/O operations */
+UNIV_INTERN
+ulint
+buf_get_n_pending_read_ios(void);
+/*============================*/
+/*********************************************************************//**
+Prints info of the buffer i/o. */
+UNIV_INTERN
+void
+buf_print_io(
+/*=========*/
+	FILE*	file);	/*!< in: file where to print */
+/*******************************************************************//**
+Collect buffer pool stats information for a buffer pool. Also
+record aggregated stats if there are more than one buffer pool
+in the server */
+UNIV_INTERN
+void
+buf_stats_get_pool_info(
+/*====================*/
+	buf_pool_t*		buf_pool,	/*!< in: buffer pool */
+	ulint			pool_id,	/*!< in: buffer pool ID */
+	buf_pool_info_t*	all_pool_info);	/*!< in/out: buffer pool info
+						to fill */
+/*********************************************************************//**
+Returns the ratio in percents of modified pages in the buffer pool /
+database pages in the buffer pool.
+@return	modified page percentage ratio */
+UNIV_INTERN
+ulint
+buf_get_modified_ratio_pct(void);
+/*============================*/
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+buf_refresh_io_stats(
+/*=================*/
+	buf_pool_t*	buf_pool);	/*!< buffer pool instance */
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+buf_refresh_io_stats_all(void);
+/*=================*/
+/*********************************************************************//**
+Asserts that all file pages in the buffer are in a replaceable state.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_all_freed(void);
+/*===============*/
+/*********************************************************************//**
+Checks that there currently are no pending i/o-operations for the buffer
+pool.
+@return	number of pending i/o operations */
+UNIV_INTERN
+ulint
+buf_pool_check_no_pending_io(void);
+/*==============================*/
+/*********************************************************************//**
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+UNIV_INTERN
+void
+buf_pool_invalidate(void);
+/*=====================*/
+#endif /* !UNIV_HOTBACKUP */
+
+/*========================================================================
+--------------------------- LOWER LEVEL ROUTINES -------------------------
+=========================================================================*/
+
+#ifdef UNIV_SYNC_DEBUG
+/*********************************************************************//**
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. */
+UNIV_INLINE
+void
+buf_block_dbg_add_level(
+/*====================*/
+	buf_block_t*	block,	/*!< in: buffer page
+				where we have acquired latch */
+	ulint		level);	/*!< in: latching order level */
+#else /* UNIV_SYNC_DEBUG */
+# define buf_block_dbg_add_level(block, level) /* nothing */
+#endif /* UNIV_SYNC_DEBUG */
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_page_get_state(
+/*===============*/
+	const buf_page_t*	bpage);	/*!< in: pointer to the control block */
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_block_get_state(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_page_set_state(
+/*===============*/
+	buf_page_t*		bpage,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state);	/*!< in: state */
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_block_set_state(
+/*================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state);	/*!< in: state */
+/*********************************************************************//**
+Determines if a block is mapped to a tablespace.
+@return	TRUE if mapped */
+UNIV_INLINE
+ibool
+buf_page_in_file(
+/*=============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+	__attribute__((pure));
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Determines if a block should be on unzip_LRU list.
+@return	TRUE if block belongs to unzip_LRU */
+UNIV_INLINE
+ibool
+buf_page_belongs_to_unzip_LRU(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+	__attribute__((pure));
+
+/*********************************************************************//**
+Gets the mutex of a block.
+@return	pointer to mutex protecting bpage */
+UNIV_INLINE
+ib_mutex_t*
+buf_page_get_mutex(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+	__attribute__((pure));
+
+/*********************************************************************//**
+Get the flush type of a page.
+@return	flush type */
+UNIV_INLINE
+buf_flush_t
+buf_page_get_flush_type(
+/*====================*/
+	const buf_page_t*	bpage)	/*!< in: buffer page */
+	__attribute__((pure));
+/*********************************************************************//**
+Set the flush type of a page. */
+UNIV_INLINE
+void
+buf_page_set_flush_type(
+/*====================*/
+	buf_page_t*	bpage,		/*!< in: buffer page */
+	buf_flush_t	flush_type);	/*!< in: flush type */
+/*********************************************************************//**
+Map a block to a file page. */
+UNIV_INLINE
+void
+buf_block_set_file_page(
+/*====================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	ulint			space,	/*!< in: tablespace id */
+	ulint			page_no);/*!< in: page number */
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix(
+/*================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_page_set_io_fix(
+/*================*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix);/*!< in: io_fix state */
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_block_set_io_fix(
+/*=================*/
+	buf_block_t*	block,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix);/*!< in: io_fix state */
+/*********************************************************************//**
+Makes a block sticky. A sticky block implies that even after we release
+the buf_pool->mutex and the block->mutex:
+* it cannot be removed from the flush_list
+* the block descriptor cannot be relocated
+* it cannot be removed from the LRU list
+Note that:
+* the block can still change its position in the LRU list
+* the next and previous pointers can change. */
+UNIV_INLINE
+void
+buf_page_set_sticky(
+/*================*/
+	buf_page_t*	bpage);	/*!< in/out: control block */
+/*********************************************************************//**
+Removes stickiness of a block. */
+UNIV_INLINE
+void
+buf_page_unset_sticky(
+/*==================*/
+	buf_page_t*	bpage);	/*!< in/out: control block */
+/********************************************************************//**
+Determine if a buffer block can be relocated in memory.  The block
+can be dirty, but it must not be I/O-fixed or bufferfixed. */
+UNIV_INLINE
+ibool
+buf_page_can_relocate(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< control block being relocated */
+	__attribute__((pure));
+
+/*********************************************************************//**
+Determine if a block has been flagged old.
+@return	TRUE if old */
+UNIV_INLINE
+ibool
+buf_page_is_old(
+/*============*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Flag a block old. */
+UNIV_INLINE
+void
+buf_page_set_old(
+/*=============*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	ibool		old);	/*!< in: old */
+/*********************************************************************//**
+Determine the time of first access of a block in the buffer pool.
+@return	ut_time_ms() at the time of first access, 0 if not accessed */
+UNIV_INLINE
+unsigned
+buf_page_is_accessed(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+	__attribute__((nonnull, pure));
+/*********************************************************************//**
+Flag a block accessed. */
+UNIV_INLINE
+void
+buf_page_set_accessed(
+/*==================*/
+	buf_page_t*	bpage)		/*!< in/out: control block */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL. Note: even though bpage is not declared a
+const we don't update its value. It is safe to make this pure.
+@return	control block, or NULL */
+UNIV_INLINE
+buf_block_t*
+buf_page_get_block(
+/*===============*/
+	buf_page_t*	bpage)	/*!< in: control block, or NULL */
+	__attribute__((pure));
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+#else /* UNIV_DEBUG */
+# define buf_block_get_frame(block) (block)->frame
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_page_get_space(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_page_get_page_no(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_page_get_zip_size(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_block_get_zip_size(
+/*===================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	__attribute__((pure));
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable. */
+#define buf_block_get_page_zip(block) \
+	((block)->page.zip.data ? &(block)->page.zip : NULL)
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Gets the block to whose frame the pointer is pointing to.
+@return	pointer to block, never NULL */
+UNIV_INTERN
+buf_block_t*
+buf_block_align(
+/*============*/
+	const byte*	ptr);	/*!< in: pointer to a frame */
+/********************************************************************//**
+Find out if a pointer belongs to a buf_block_t. It can be a pointer to
+the buf_block_t itself or a member of it
+@return	TRUE if ptr belongs to a buf_block_t struct */
+UNIV_INTERN
+ibool
+buf_pointer_is_block_field(
+/*=======================*/
+	const void*		ptr);	/*!< in: pointer not
+					dereferenced */
+/** Find out if a pointer corresponds to a buf_block_t::mutex.
+@param m	in: mutex candidate
+@return		TRUE if m is a buf_block_t::mutex */
+#define buf_pool_is_block_mutex(m)			\
+	buf_pointer_is_block_field((const void*)(m))
+/** Find out if a pointer corresponds to a buf_block_t::lock.
+@param l	in: rw-lock candidate
+@return		TRUE if l is a buf_block_t::lock */
+#define buf_pool_is_block_lock(l)			\
+	buf_pointer_is_block_field((const void*)(l))
+
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable.
+@return	compressed page descriptor, or NULL */
+UNIV_INLINE
+const page_zip_des_t*
+buf_frame_get_page_zip(
+/*===================*/
+	const byte*	ptr);	/*!< in: pointer to the page */
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+/********************************************************************//**
+Function which inits a page for read to the buffer buf_pool. If the page is
+(1) already in buf_pool, or
+(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
+(3) if the space is deleted or being deleted,
+then this function does nothing.
+Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
+on the buffer frame. The io-handler must take care that the flag is cleared
+and the lock released later.
+@return	pointer to the block or NULL */
+UNIV_INTERN
+buf_page_t*
+buf_page_init_for_read(
+/*===================*/
+	dberr_t*	err,	/*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	ulint		mode,	/*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size, or 0 */
+	ibool		unzip,	/*!< in: TRUE=request uncompressed page */
+	ib_int64_t	tablespace_version,/*!< in: prevents reading from a wrong
+				version of the tablespace in case we have done
+				DISCARD + IMPORT */
+	ulint		offset);/*!< in: page number */
+/********************************************************************//**
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool.
+@return true if successful */
+UNIV_INTERN
+bool
+buf_page_io_complete(
+/*=================*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+/********************************************************************//**
+Calculates a folded value of a file page address to use in the page hash
+table.
+@return	the folded value */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+	__attribute__((const));
+/********************************************************************//**
+Calculates the index of a buffer pool to the buf_pool[] array.
+@return	the position of the buffer pool in buf_pool[] */
+UNIV_INLINE
+ulint
+buf_pool_index(
+/*===========*/
+	const buf_pool_t*	buf_pool)	/*!< in: buffer pool */
+	__attribute__((nonnull, const));
+/******************************************************************//**
+Returns the buffer pool instance given a page instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_bpage(
+/*================*/
+	const buf_page_t*	bpage); /*!< in: buffer pool page */
+/******************************************************************//**
+Returns the buffer pool instance given a block instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_block(
+/*================*/
+	const buf_block_t*	block); /*!< in: block */
+/******************************************************************//**
+Returns the buffer pool instance given space and offset of page
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_get(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: offset of the page within space */
+/******************************************************************//**
+Returns the buffer pool instance given its array index
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_array(
+/*================*/
+	ulint	index);		/*!< in: array index to get
+				buffer pool instance from */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get_low(
+/*==================*/
+	buf_pool_t*	buf_pool,/*!< buffer pool instance */
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space */
+	ulint		fold);	/*!< in: buf_page_address_fold(space, offset) */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
+@return	block, NULL if not found, or watch sentinel (if watch is true) */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get_locked(
+/*=====================*/
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_lock
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	rw_lock_t**	lock,		/*!< in/out: lock of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_lock
+					is released by this function */
+	ulint		lock_mode,	/*!< in: RW_LOCK_EX or
+					RW_LOCK_SHARED. Ignored if
+					lock == NULL */
+	bool		watch = false);	/*!< in: if true, return watch
+					sentinel also. */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_block_t*
+buf_block_hash_get_locked(
+/*=====================*/
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_lock
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	rw_lock_t**	lock,		/*!< in/out: lock of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_lock
+					is released by this function */
+	ulint		lock_mode);	/*!< in: RW_LOCK_EX or
+					RW_LOCK_SHARED. Ignored if
+					lock == NULL */
+/* There are four different ways we can try to get a bpage or block
+from the page hash:
+1) Caller already holds the appropriate page hash lock: in the case call
+buf_page_hash_get_low() function.
+2) Caller wants to hold page hash lock in x-mode
+3) Caller wants to hold page hash lock in s-mode
+4) Caller doesn't want to hold page hash lock */
+#define buf_page_hash_get_s_locked(b, s, o, l)			\
+	buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
+#define buf_page_hash_get_x_locked(b, s, o, l)			\
+	buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX)
+#define buf_page_hash_get(b, s, o)				\
+	buf_page_hash_get_locked(b, s, o, NULL, 0)
+#define buf_page_get_also_watch(b, s, o)			\
+	buf_page_hash_get_locked(b, s, o, NULL, 0, true)
+
+#define buf_block_hash_get_s_locked(b, s, o, l)			\
+	buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
+#define buf_block_hash_get_x_locked(b, s, o, l)			\
+	buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX)
+#define buf_block_hash_get(b, s, o)				\
+	buf_block_hash_get_locked(b, s, o, NULL, 0)
+
+/*********************************************************************//**
+Gets the current length of the free list of buffer blocks.
+@return	length of the free list */
+UNIV_INTERN
+ulint
+buf_get_free_list_len(void);
+/*=======================*/
+
+/********************************************************************//**
+Determine if a block is a sentinel for a buffer pool watch.
+@return	TRUE if a sentinel for a buffer pool watch, FALSE if not */
+UNIV_INTERN
+ibool
+buf_pool_watch_is_sentinel(
+/*=======================*/
+	buf_pool_t*		buf_pool,	/*!< buffer pool instance */
+	const buf_page_t*	bpage)		/*!< in: block */
+	__attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Add watch for the given page to be read in. Caller must have the buffer pool
+@return NULL if watch set, block if the page is in the buffer pool */
+UNIV_INTERN
+buf_page_t*
+buf_pool_watch_set(
+/*===============*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset,	/*!< in: page number */
+	ulint	fold)	/*!< in: buf_page_address_fold(space, offset) */
+	__attribute__((warn_unused_result));
+/****************************************************************//**
+Stop watching if the page has been read in.
+buf_pool_watch_set(space,offset) must have returned NULL before. */
+UNIV_INTERN
+void
+buf_pool_watch_unset(
+/*=================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+/****************************************************************//**
+Check if the page has been read in.
+This may only be called after buf_pool_watch_set(space,offset)
+has returned NULL and before invoking buf_pool_watch_unset(space,offset).
+@return	FALSE if the given page was not read in, TRUE if it was */
+UNIV_INTERN
+ibool
+buf_pool_watch_occurred(
+/*====================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+	__attribute__((warn_unused_result));
+/********************************************************************//**
+Get total buffer pool statistics. */
+UNIV_INTERN
+void
+buf_get_total_list_len(
+/*===================*/
+	ulint*		LRU_len,	/*!< out: length of all LRU lists */
+	ulint*		free_len,	/*!< out: length of all free lists */
+	ulint*		flush_list_len);/*!< out: length of all flush lists */
+/********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+	buf_pools_list_size_t*	buf_pools_list_size);	/*!< out: list sizes
+							in all buffer pools */
+/********************************************************************//**
+Get total buffer pool statistics. */
+UNIV_INTERN
+void
+buf_get_total_stat(
+/*===============*/
+	buf_pool_stat_t*tot_stat);	/*!< out: buffer pool stats */
+/*********************************************************************//**
+Get the nth chunk's buffer block in the specified buffer pool.
+@return the nth chunk's buffer block. */
+UNIV_INLINE
+buf_block_t*
+buf_get_nth_chunk_block(
+/*====================*/
+	const buf_pool_t* buf_pool,	/*!< in: buffer pool instance */
+	ulint		n,		/*!< in: nth chunk in the buffer pool */
+	ulint*		chunk_size);	/*!< in: chunk size */
+
+/********************************************************************//**
+Calculate the checksum of a page from compressed table and update the page. */
+UNIV_INTERN
+void
+buf_flush_update_zip_checksum(
+/*==========================*/
+	buf_frame_t*	page,		/*!< in/out: Page to update */
+	ulint		zip_size,	/*!< in: Compressed page size */
+	lsn_t		lsn);		/*!< in: Lsn to stamp on the page */
+
+#endif /* !UNIV_HOTBACKUP */
+
+/** The common buffer control block structure
+for compressed and uncompressed frames */
+
+/** Number of bits used for buffer page states. */
+#define BUF_PAGE_STATE_BITS	3
+
+struct buf_page_t{
+	/** @name General fields
+	None of these bit-fields must be modified without holding
+	buf_page_get_mutex() [buf_block_t::mutex or
+	buf_pool->zip_mutex], since they can be stored in the same
+	machine word.  Some of these fields are additionally protected
+	by buf_pool->mutex. */
+	/* @{ */
+
+	ib_uint32_t	space;		/*!< tablespace id; also protected
+					by buf_pool->mutex. */
+	ib_uint32_t	offset;		/*!< page number; also protected
+					by buf_pool->mutex. */
+	/** count of how manyfold this block is currently bufferfixed */
+#ifdef PAGE_ATOMIC_REF_COUNT
+	ib_uint32_t	buf_fix_count;
+
+	/** type of pending I/O operation; also protected by
+	buf_pool->mutex for writes only @see enum buf_io_fix */
+	byte		io_fix;
+
+	byte		state;
+#else
+	unsigned	buf_fix_count:19;
+
+	/** type of pending I/O operation; also protected by
+	buf_pool->mutex for writes only @see enum buf_io_fix */
+	unsigned	io_fix:2;
+
+	/*!< state of the control block; also protected by buf_pool->mutex.
+	State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY
+	need not be protected by buf_page_get_mutex(). @see enum buf_page_state.
+	State changes that are relevant to page_hash are additionally protected
+	by the appropriate page_hash mutex i.e.: if a page is in page_hash or
+	is being added to/removed from page_hash then the corresponding changes
+	must also be protected by page_hash mutex. */
+	unsigned	state:BUF_PAGE_STATE_BITS;
+
+#endif /* PAGE_ATOMIC_REF_COUNT */
+
+#ifndef UNIV_HOTBACKUP
+	unsigned	flush_type:2;	/*!< if this block is currently being
+					flushed to disk, this tells the
+					flush_type.
+					@see buf_flush_t */
+	unsigned	buf_pool_index:6;/*!< index number of the buffer pool
+					that this block belongs to */
+# if MAX_BUFFER_POOLS > 64
+#  error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
+# endif
+	/* @} */
+#endif /* !UNIV_HOTBACKUP */
+	page_zip_des_t	zip;		/*!< compressed page; zip.data
+					(but not the data it points to) is
+					also protected by buf_pool->mutex;
+					state == BUF_BLOCK_ZIP_PAGE and
+					zip.data == NULL means an active
+					buf_pool->watch */
+#ifndef UNIV_HOTBACKUP
+	buf_page_t*	hash;		/*!< node used in chaining to
+					buf_pool->page_hash or
+					buf_pool->zip_hash */
+#ifdef UNIV_DEBUG
+	ibool		in_page_hash;	/*!< TRUE if in buf_pool->page_hash */
+	ibool		in_zip_hash;	/*!< TRUE if in buf_pool->zip_hash */
+#endif /* UNIV_DEBUG */
+
+	/** @name Page flushing fields
+	All these are protected by buf_pool->mutex. */
+	/* @{ */
+
+	UT_LIST_NODE_T(buf_page_t) list;
+					/*!< based on state, this is a
+					list node, protected either by
+					buf_pool->mutex or by
+					buf_pool->flush_list_mutex,
+					in one of the following lists in
+					buf_pool:
+
+					- BUF_BLOCK_NOT_USED:	free
+					- BUF_BLOCK_FILE_PAGE:	flush_list
+					- BUF_BLOCK_ZIP_DIRTY:	flush_list
+					- BUF_BLOCK_ZIP_PAGE:	zip_clean
+
+					If bpage is part of flush_list
+					then the node pointers are
+					covered by buf_pool->flush_list_mutex.
+					Otherwise these pointers are
+					protected by buf_pool->mutex.
+
+					The contents of the list node
+					is undefined if !in_flush_list
+					&& state == BUF_BLOCK_FILE_PAGE,
+					or if state is one of
+					BUF_BLOCK_MEMORY,
+					BUF_BLOCK_REMOVE_HASH or
+					BUF_BLOCK_READY_IN_USE. */
+
+#ifdef UNIV_DEBUG
+	ibool		in_flush_list;	/*!< TRUE if in buf_pool->flush_list;
+					when buf_pool->flush_list_mutex is
+					free, the following should hold:
+					in_flush_list
+					== (state == BUF_BLOCK_FILE_PAGE
+					    || state == BUF_BLOCK_ZIP_DIRTY)
+					Writes to this field must be
+					covered by both block->mutex
+					and buf_pool->flush_list_mutex. Hence
+					reads can happen while holding
+					any one of the two mutexes */
+	ibool		in_free_list;	/*!< TRUE if in buf_pool->free; when
+					buf_pool->mutex is free, the following
+					should hold: in_free_list
+					== (state == BUF_BLOCK_NOT_USED) */
+#endif /* UNIV_DEBUG */
+	lsn_t		newest_modification;
+					/*!< log sequence number of
+					the youngest modification to
+					this block, zero if not
+					modified. Protected by block
+					mutex */
+	lsn_t		oldest_modification;
+					/*!< log sequence number of
+					the START of the log entry
+					written of the oldest
+					modification to this block
+					which has not yet been flushed
+					on disk; zero if all
+					modifications are on disk.
+					Writes to this field must be
+					covered by both block->mutex
+					and buf_pool->flush_list_mutex. Hence
+					reads can happen while holding
+					any one of the two mutexes */
+	/* @} */
+	/** @name LRU replacement algorithm fields
+	These fields are protected by buf_pool->mutex only (not
+	buf_pool->zip_mutex or buf_block_t::mutex). */
+	/* @{ */
+
+	UT_LIST_NODE_T(buf_page_t) LRU;
+					/*!< node of the LRU list */
+#ifdef UNIV_DEBUG
+	ibool		in_LRU_list;	/*!< TRUE if the page is in
+					the LRU list; used in
+					debugging */
+#endif /* UNIV_DEBUG */
+	unsigned	old:1;		/*!< TRUE if the block is in the old
+					blocks in buf_pool->LRU_old */
+	unsigned	freed_page_clock:31;/*!< the value of
+					buf_pool->freed_page_clock
+					when this block was the last
+					time put to the head of the
+					LRU list; a thread is allowed
+					to read this for heuristic
+					purposes without holding any
+					mutex or latch */
+	/* @} */
+	unsigned	access_time;	/*!< time of first access, or
+					0 if the block was never accessed
+					in the buffer pool. Protected by
+					block mutex */
+# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+	ibool		file_page_was_freed;
+					/*!< this is set to TRUE when
+					fsp frees a page in buffer pool;
+					protected by buf_pool->zip_mutex
+					or buf_block_t::mutex. */
+# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+};
+
+/** The buffer control block structure */
+
+struct buf_block_t{
+
+	/** @name General fields */
+	/* @{ */
+
+	buf_page_t	page;		/*!< page information; this must
+					be the first field, so that
+					buf_pool->page_hash can point
+					to buf_page_t or buf_block_t */
+	byte*		frame;		/*!< pointer to buffer frame which
+					is of size UNIV_PAGE_SIZE, and
+					aligned to an address divisible by
+					UNIV_PAGE_SIZE */
+#ifndef UNIV_HOTBACKUP
+	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
+					/*!< node of the decompressed LRU list;
+					a block is in the unzip_LRU list
+					if page.state == BUF_BLOCK_FILE_PAGE
+					and page.zip.data != NULL */
+#ifdef UNIV_DEBUG
+	ibool		in_unzip_LRU_list;/*!< TRUE if the page is in the
+					decompressed LRU list;
+					used in debugging */
+#endif /* UNIV_DEBUG */
+	ib_mutex_t	mutex;		/*!< mutex protecting this block:
+					state (also protected by the buffer
+					pool mutex), io_fix, buf_fix_count,
+					and accessed; we introduce this new
+					mutex in InnoDB-5.1 to relieve
+					contention on the buffer pool mutex */
+	rw_lock_t	lock;		/*!< read-write lock of the buffer
+					frame */
+	unsigned	lock_hash_val:32;/*!< hashed value of the page address
+					in the record lock hash table;
+					protected by buf_block_t::lock
+					(or buf_block_t::mutex, buf_pool->mutex
+				        in buf_page_get_gen(),
+					buf_page_init_for_read()
+					and buf_page_create()) */
+	ibool		check_index_page_at_flush;
+					/*!< TRUE if we know that this is
+					an index page, and want the database
+					to check its consistency before flush;
+					note that there may be pages in the
+					buffer pool which are index pages,
+					but this flag is not set because
+					we do not keep track of all pages;
+					NOT protected by any mutex */
+	/* @} */
+	/** @name Optimistic search field */
+	/* @{ */
+
+	ib_uint64_t	modify_clock;	/*!< this clock is incremented every
+					time a pointer to a record on the
+					page may become obsolete; this is
+					used in the optimistic cursor
+					positioning: if the modify clock has
+					not changed, we know that the pointer
+					is still valid; this field may be
+					changed if the thread (1) owns the
+					pool mutex and the page is not
+					bufferfixed, or (2) the thread has an
+					x-latch on the block */
+	/* @} */
+	/** @name Hash search fields (unprotected)
+	NOTE that these fields are NOT protected by any semaphore! */
+	/* @{ */
+
+	ulint		n_hash_helps;	/*!< counter which controls building
+					of a new hash index for the page */
+	ulint		n_fields;	/*!< recommended prefix length for hash
+					search: number of full fields */
+	ulint		n_bytes;	/*!< recommended prefix: number of bytes
+					in an incomplete field */
+	ibool		left_side;	/*!< TRUE or FALSE, depending on
+					whether the leftmost record of several
+					records with the same prefix should be
+					indexed in the hash index */
+	/* @} */
+
+	/** @name Hash search fields
+	These 5 fields may only be modified when we have
+	an x-latch on btr_search_latch AND
+	- we are holding an s-latch or x-latch on buf_block_t::lock or
+	- we know that buf_block_t::buf_fix_count == 0.
+
+	An exception to this is when we init or create a page
+	in the buffer pool in buf0buf.cc.
+
+	Another exception is that assigning block->index = NULL
+	is allowed whenever holding an x-latch on btr_search_latch. */
+
+	/* @{ */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	ulint		n_pointers;	/*!< used in debugging: the number of
+					pointers in the adaptive hash index
+					pointing to this frame */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	unsigned	curr_n_fields:10;/*!< prefix length for hash indexing:
+					number of full fields */
+	unsigned	curr_n_bytes:15;/*!< number of bytes in hash
+					indexing */
+	unsigned	curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
+	dict_index_t*	index;		/*!< Index for which the
+					adaptive hash index has been
+					created, or NULL if the page
+					does not exist in the
+					index. Note that it does not
+					guarantee that the index is
+					complete, though: there may
+					have been hash collisions,
+					record deletions, etc. */
+	/* @} */
+# ifdef UNIV_SYNC_DEBUG
+	/** @name Debug fields */
+	/* @{ */
+	rw_lock_t	debug_latch;	/*!< in the debug version, each thread
+					which bufferfixes the block acquires
+					an s-latch here; so we can use the
+					debug utilities in sync0rw */
+	/* @} */
+# endif
+#endif /* !UNIV_HOTBACKUP */
+};
+
+/** Check if a buf_block_t object is in a valid state
+@param block	buffer block
+@return		TRUE if valid */
+#define buf_block_state_valid(block)				\
+(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED		\
+ && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Compute the hash fold value for blocks in buf_pool->zip_hash. */
+/* @{ */
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
+/* @} */
+
+/** Struct that is embedded in the free zip blocks */
+struct buf_buddy_free_t {
+	union {
+		ulint	size;	/*!< size of the block */
+		byte	bytes[FIL_PAGE_DATA];
+				/*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
+				== BUF_BUDDY_FREE_STAMP denotes a free
+				block. If the space_id field of buddy
+				block != BUF_BUDDY_FREE_STAMP, the block
+				is not in any zip_free list. If the
+				space_id is BUF_BUDDY_FREE_STAMP then
+				stamp[0] will contain the
+				buddy block size. */
+	} stamp;
+
+	buf_page_t	bpage;	/*!< Embedded bpage descriptor */
+	UT_LIST_NODE_T(buf_buddy_free_t) list;
+				/*!< Node of zip_free list */
+};
+
+/** @brief The buffer pool statistics structure. */
+struct buf_pool_stat_t{
+	ulint	n_page_gets;	/*!< number of page gets performed;
+				also successful searches through
+				the adaptive hash index are
+				counted as page gets; this field
+				is NOT protected by the buffer
+				pool mutex */
+	ulint	n_pages_read;	/*!< number read operations */
+	ulint	n_pages_written;/*!< number write operations */
+	ulint	n_pages_created;/*!< number of pages created
+				in the pool with no read */
+	ulint	n_ra_pages_read_rnd;/*!< number of pages read in
+				as part of random read ahead */
+	ulint	n_ra_pages_read;/*!< number of pages read in
+				as part of read ahead */
+	ulint	n_ra_pages_evicted;/*!< number of read ahead
+				pages that are evicted without
+				being accessed */
+	ulint	n_pages_made_young; /*!< number of pages made young, in
+				calls to buf_LRU_make_block_young() */
+	ulint	n_pages_not_made_young; /*!< number of pages not made
+				young because the first access
+				was not long enough ago, in
+				buf_page_peek_if_too_old() */
+	ulint	LRU_bytes;	/*!< LRU size in bytes */
+	ulint	flush_list_bytes;/*!< flush_list size in bytes */
+};
+
+/** Statistics of buddy blocks of a given size. */
+struct buf_buddy_stat_t {
+	/** Number of blocks allocated from the buddy system. */
+	ulint		used;
+	/** Number of blocks relocated by the buddy system. */
+	ib_uint64_t	relocated;
+	/** Total duration of block relocations, in microseconds. */
+	ib_uint64_t	relocated_usec;
+};
+
+/** @brief The buffer pool structure.
+
+NOTE! The definition appears here only for other modules of this
+directory (buf) to see it. Do not use from outside! */
+
+struct buf_pool_t{
+
+	/** @name General fields */
+	/* @{ */
+	ib_mutex_t	mutex;		/*!< Buffer pool mutex of this
+					instance */
+	ib_mutex_t	zip_mutex;	/*!< Zip mutex of this buffer
+					pool instance, protects compressed
+					only pages (of type buf_page_t, not
+					buf_block_t */
+	ulint		instance_no;	/*!< Array index of this buffer
+					pool instance */
+	ulint		old_pool_size;  /*!< Old pool size in bytes */
+	ulint		curr_pool_size;	/*!< Current pool size in bytes */
+	ulint		LRU_old_ratio;  /*!< Reserve this much of the buffer
+					pool for "old" blocks */
+#ifdef UNIV_DEBUG
+	ulint		buddy_n_frames; /*!< Number of frames allocated from
+					the buffer pool to the buddy system */
+#endif
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ulint		mutex_exit_forbidden; /*!< Forbid release mutex */
+#endif
+	ulint		n_chunks;	/*!< number of buffer pool chunks */
+	buf_chunk_t*	chunks;		/*!< buffer pool chunks */
+	ulint		curr_size;	/*!< current pool size in pages */
+	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
+					buf_block_t file pages,
+					buf_page_in_file() == TRUE,
+					indexed by (space_id, offset).
+					page_hash is protected by an
+					array of mutexes.
+					Changes in page_hash are protected
+					by buf_pool->mutex and the relevant
+					page_hash mutex. Lookups can happen
+					while holding the buf_pool->mutex or
+					the relevant page_hash mutex. */
+	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
+					whose frames are allocated to the
+					zip buddy system,
+					indexed by block->frame */
+	ulint		n_pend_reads;	/*!< number of pending read
+					operations */
+	ulint		n_pend_unzip;	/*!< number of pending decompressions */
+
+	time_t		last_printout_time;
+					/*!< when buf_print_io was last time
+					called */
+	buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
+					/*!< Statistics of buddy system,
+					indexed by block size */
+	buf_pool_stat_t	stat;		/*!< current statistics */
+	buf_pool_stat_t	old_stat;	/*!< old statistics */
+
+	/* @} */
+
+	/** @name Page flushing algorithm fields */
+
+	/* @{ */
+
+	ib_mutex_t	flush_list_mutex;/*!< mutex protecting the
+					flush list access. This mutex
+					protects flush_list, flush_rbt
+					and bpage::list pointers when
+					the bpage is on flush_list. It
+					also protects writes to
+					bpage::oldest_modification and
+					flush_list_hp */
+	const buf_page_t*	flush_list_hp;/*!< "hazard pointer"
+					used during scan of flush_list
+					while doing flush list batch.
+					Protected by flush_list_mutex */
+	UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
+					/*!< base node of the modified block
+					list */
+	ibool		init_flush[BUF_FLUSH_N_TYPES];
+					/*!< this is TRUE when a flush of the
+					given type is being initialized */
+	ulint		n_flush[BUF_FLUSH_N_TYPES];
+					/*!< this is the number of pending
+					writes in the given flush type */
+	os_event_t	no_flush[BUF_FLUSH_N_TYPES];
+					/*!< this is in the set state
+					when there is no flush batch
+					of the given type running */
+	ib_rbt_t*	flush_rbt;	/*!< a red-black tree is used
+					exclusively during recovery to
+					speed up insertions in the
+					flush_list. This tree contains
+					blocks in order of
+					oldest_modification LSN and is
+					kept in sync with the
+					flush_list.
+					Each member of the tree MUST
+					also be on the flush_list.
+					This tree is relevant only in
+					recovery and is set to NULL
+					once the recovery is over.
+					Protected by flush_list_mutex */
+	ulint		freed_page_clock;/*!< a sequence number used
+					to count the number of buffer
+					blocks removed from the end of
+					the LRU list; NOTE that this
+					counter may wrap around at 4
+					billion! A thread is allowed
+					to read this for heuristic
+					purposes without holding any
+					mutex or latch */
+	ibool		try_LRU_scan;	/*!< Set to FALSE when an LRU
+					scan for free block fails. This
+					flag is used to avoid repeated
+					scans of LRU list when we know
+					that there is no free block
+					available in the scan depth for
+					eviction. Set to TRUE whenever
+					we flush a batch from the
+					buffer pool. Protected by the
+					buf_pool->mutex */
+	/* @} */
+
+	/** @name LRU replacement algorithm fields */
+	/* @{ */
+
+	UT_LIST_BASE_NODE_T(buf_page_t) free;
+					/*!< base node of the free
+					block list */
+	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
+					/*!< base node of the LRU list */
+	buf_page_t*	LRU_old;	/*!< pointer to the about
+					LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+					oldest blocks in the LRU list;
+					NULL if LRU length less than
+					BUF_LRU_OLD_MIN_LEN;
+					NOTE: when LRU_old != NULL, its length
+					should always equal LRU_old_len */
+	ulint		LRU_old_len;	/*!< length of the LRU list from
+					the block to which LRU_old points
+					onward, including that block;
+					see buf0lru.cc for the restrictions
+					on this value; 0 if LRU_old == NULL;
+					NOTE: LRU_old_len must be adjusted
+					whenever LRU_old shrinks or grows! */
+
+	UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
+					/*!< base node of the
+					unzip_LRU list */
+
+	/* @} */
+	/** @name Buddy allocator fields
+	The buddy allocator is used for allocating compressed page
+	frames and buf_page_t descriptors of blocks that exist
+	in the buffer pool only in compressed form. */
+	/* @{ */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	UT_LIST_BASE_NODE_T(buf_page_t)	zip_clean;
+					/*!< unmodified compressed pages */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+	UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
+					/*!< buddy free lists */
+
+	buf_page_t*			watch;
+					/*!< Sentinel records for buffer
+					pool watches. Protected by
+					buf_pool->mutex. */
+
+#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
+# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
+#endif
+	/* @} */
+};
+
+/** @name Accessors for buf_pool->mutex.
+Use these instead of accessing buf_pool->mutex directly. */
+/* @{ */
+
+/** Test if a buffer pool mutex is owned. */
+#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
+/** Acquire a buffer pool mutex. */
+#define buf_pool_mutex_enter(b) do {			\
+	ut_ad(!mutex_own(&b->zip_mutex));		\
+	mutex_enter(&b->mutex);				\
+} while (0)
+
+/** Test if flush list mutex is owned. */
+#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
+
+/** Acquire the flush list mutex. */
+#define buf_flush_list_mutex_enter(b) do {		\
+	mutex_enter(&b->flush_list_mutex);		\
+} while (0)
+/** Release the flush list mutex. */
+# define buf_flush_list_mutex_exit(b) do {		\
+	mutex_exit(&b->flush_list_mutex);		\
+} while (0)
+
+/** Test if block->mutex is owned. */
+#define buf_block_mutex_own(b)	mutex_own(&(b)->mutex)
+
+/** Acquire the block->mutex. */
+#define buf_block_mutex_enter(b) do {			\
+	mutex_enter(&(b)->mutex);			\
+} while (0)
+
+/** Release the trx->mutex. */
+#define buf_block_mutex_exit(b) do {			\
+	mutex_exit(&(b)->mutex);				\
+} while (0)
+
+
+/** Get appropriate page_hash_lock. */
+# define buf_page_hash_lock_get(b, f)			\
+	hash_get_lock(b->page_hash, f)
+
+#ifdef UNIV_SYNC_DEBUG
+/** Test if page_hash lock is held in s-mode. */
+# define buf_page_hash_lock_held_s(b, p)		\
+	rw_lock_own(buf_page_hash_lock_get(b,		\
+		  buf_page_address_fold(p->space,	\
+					p->offset)),	\
+					RW_LOCK_SHARED)
+
+/** Test if page_hash lock is held in x-mode. */
+# define buf_page_hash_lock_held_x(b, p)		\
+	rw_lock_own(buf_page_hash_lock_get(b,		\
+		  buf_page_address_fold(p->space,	\
+					p->offset)),	\
+					RW_LOCK_EX)
+
+/** Test if page_hash lock is held in x or s-mode. */
+# define buf_page_hash_lock_held_s_or_x(b, p)		\
+	(buf_page_hash_lock_held_s(b, p)		\
+	 || buf_page_hash_lock_held_x(b, p))
+
+# define buf_block_hash_lock_held_s(b, p)		\
+	buf_page_hash_lock_held_s(b, &(p->page))
+
+# define buf_block_hash_lock_held_x(b, p)		\
+	buf_page_hash_lock_held_x(b, &(p->page))
+
+# define buf_block_hash_lock_held_s_or_x(b, p)		\
+	buf_page_hash_lock_held_s_or_x(b, &(p->page))
+#else /* UNIV_SYNC_DEBUG */
+# define buf_page_hash_lock_held_s(b, p)	(TRUE)
+# define buf_page_hash_lock_held_x(b, p)	(TRUE)
+# define buf_page_hash_lock_held_s_or_x(b, p)	(TRUE)
+# define buf_block_hash_lock_held_s(b, p)	(TRUE)
+# define buf_block_hash_lock_held_x(b, p)	(TRUE)
+# define buf_block_hash_lock_held_s_or_x(b, p)	(TRUE)
+#endif /* UNIV_SYNC_DEBUG */
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid(b) do {	\
+	ut_ad(buf_pool_mutex_own(b));		\
+	b->mutex_exit_forbidden++;		\
+} while (0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow(b) do {	\
+	ut_ad(buf_pool_mutex_own(b));		\
+	ut_a(b->mutex_exit_forbidden);	\
+	b->mutex_exit_forbidden--;		\
+} while (0)
+/** Release the buffer pool mutex. */
+# define buf_pool_mutex_exit(b) do {		\
+	ut_a(!b->mutex_exit_forbidden);		\
+	mutex_exit(&b->mutex);			\
+} while (0)
+#else
+/** Forbid the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_forbid(b) ((void) 0)
+/** Allow the release of the buffer pool mutex. */
+# define buf_pool_mutex_exit_allow(b) ((void) 0)
+/** Release the buffer pool mutex. */
+# define buf_pool_mutex_exit(b) mutex_exit(&b->mutex)
+#endif
+#endif /* !UNIV_HOTBACKUP */
+/* @} */
+
+/**********************************************************************
+Let us list the consistency conditions for different control block states.
+
+NOT_USED:	is in free list, not in LRU list, not in flush list, nor
+		page hash table
+READY_FOR_USE:	is not in free list, LRU list, or flush list, nor page
+		hash table
+MEMORY:		is not in free list, LRU list, or flush list, nor page
+		hash table
+FILE_PAGE:	space and offset are defined, is in page hash table
+		if io_fix == BUF_IO_WRITE,
+			pool: no_flush[flush_type] is in reset state,
+			pool: n_flush[flush_type] > 0
+
+		(1) if buf_fix_count == 0, then
+			is in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			is x-locked,
+				if and only if io_fix == BUF_IO_READ
+			is s-locked,
+				if and only if io_fix == BUF_IO_WRITE
+
+		(2) if buf_fix_count > 0, then
+			is not in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			if io_fix == BUF_IO_READ,
+				is x-locked
+			if io_fix == BUF_IO_WRITE,
+				is s-locked
+
+State transitions:
+
+NOT_USED => READY_FOR_USE
+READY_FOR_USE => MEMORY
+READY_FOR_USE => FILE_PAGE
+MEMORY => NOT_USED
+FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if
+				(1) buf_fix_count == 0,
+				(2) oldest_modification == 0, and
+				(3) io_fix == 0.
+*/
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/** Functor to validate the LRU list. */
+struct	CheckInLRUList {
+	void	operator()(const buf_page_t* elem) const
+	{
+		ut_a(elem->in_LRU_list);
+	}
+};
+
+/** Functor to validate the LRU list. */
+struct	CheckInFreeList {
+	void	operator()(const buf_page_t* elem) const
+	{
+		ut_a(elem->in_free_list);
+	}
+};
+
+struct	CheckUnzipLRUAndLRUList {
+	void	operator()(const buf_block_t* elem) const
+	{
+                ut_a(elem->page.in_LRU_list);
+                ut_a(elem->in_unzip_LRU_list);
+	}
+};
+#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
+
+#ifndef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
new file mode 100644
index 00000000000..56616c6deeb
--- /dev/null
+++ b/storage/innobase/include/buf0buf.ic
@@ -0,0 +1,1460 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0buf.ic
+The database buffer buf_pool
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0mtr.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+
+/** A chunk of buffers. The buffer pool is allocated in chunks. */
+struct buf_chunk_t{
+	ulint		mem_size;	/*!< allocated size of the chunk */
+	ulint		size;		/*!< size of frames[] and blocks[] */
+	void*		mem;		/*!< pointer to the memory area which
+					was allocated for the frames */
+	buf_block_t*	blocks;		/*!< array of buffer control blocks */
+};
+
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return size in bytes */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void)
+/*========================*/
+{
+	return(srv_buf_pool_curr_size);
+}
+
+/********************************************************************//**
+Calculates the index of a buffer pool to the buf_pool[] array.
+@return	the position of the buffer pool in buf_pool[] */
+UNIV_INLINE
+ulint
+buf_pool_index(
+/*===========*/
+	const buf_pool_t*	buf_pool)	/*!< in: buffer pool */
+{
+	ulint	i = buf_pool - buf_pool_ptr;
+	ut_ad(i < MAX_BUFFER_POOLS);
+	ut_ad(i < srv_buf_pool_instances);
+	return(i);
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given a page instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_bpage(
+/*================*/
+	const buf_page_t*	bpage) /*!< in: buffer pool page */
+{
+	ulint	i;
+	i = bpage->buf_pool_index;
+	ut_ad(i < srv_buf_pool_instances);
+	return(&buf_pool_ptr[i]);
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given a block instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_block(
+/*================*/
+	const buf_block_t*	block) /*!< in: block */
+{
+	return(buf_pool_from_bpage(&block->page));
+}
+
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in pages.
+@return size in pages*/
+UNIV_INLINE
+ulint
+buf_pool_get_n_pages(void)
+/*======================*/
+{
+	return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
+}
+
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_page_get_freed_page_clock(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+{
+	/* This is sometimes read without holding buf_pool->mutex. */
+	return(bpage->freed_page_clock);
+}
+
+/********************************************************************//**
+Reads the freed_page_clock of a buffer block.
+@return	freed_page_clock */
+UNIV_INLINE
+ulint
+buf_block_get_freed_page_clock(
+/*===========================*/
+	const buf_block_t*	block)	/*!< in: block */
+{
+	return(buf_page_get_freed_page_clock(&block->page));
+}
+
+/********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return	TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+{
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+
+	/* FIXME: bpage->freed_page_clock is 31 bits */
+	return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
+	       < ((ulint) bpage->freed_page_clock
+		  + (buf_pool->curr_size
+		     * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
+		     / (BUF_LRU_OLD_RATIO_DIV * 4))));
+}
+
+/********************************************************************//**
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex.
+@return	TRUE if should be made younger */
+UNIV_INLINE
+ibool
+buf_page_peek_if_too_old(
+/*=====================*/
+	const buf_page_t*	bpage)	/*!< in: block to make younger */
+{
+	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
+
+	if (buf_pool->freed_page_clock == 0) {
+		/* If eviction has not started yet, do not update the
+		statistics or move blocks in the LRU list.  This is
+		either the warm-up phase or an in-memory workload. */
+		return(FALSE);
+	} else if (buf_LRU_old_threshold_ms && bpage->old) {
+		unsigned	access_time = buf_page_is_accessed(bpage);
+
+		if (access_time > 0
+		    && ((ib_uint32_t) (ut_time_ms() - access_time))
+		    >= buf_LRU_old_threshold_ms) {
+			return(TRUE);
+		}
+
+		buf_pool->stat.n_pages_not_made_young++;
+		return(FALSE);
+	} else {
+		return(!buf_page_peek_if_young(bpage));
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_page_get_state(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	enum buf_page_state	state = (enum buf_page_state) bpage->state;
+
+#ifdef UNIV_DEBUG
+	switch (state) {
+	case BUF_BLOCK_POOL_WATCH:
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_FILE_PAGE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	return(state);
+}
+/*********************************************************************//**
+Gets the state of a block.
+@return	state */
+UNIV_INLINE
+enum buf_page_state
+buf_block_get_state(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(buf_page_get_state(&block->page));
+}
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_page_set_state(
+/*===============*/
+	buf_page_t*		bpage,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state)	/*!< in: state */
+{
+#ifdef UNIV_DEBUG
+	enum buf_page_state	old_state	= buf_page_get_state(bpage);
+
+	switch (old_state) {
+	case BUF_BLOCK_POOL_WATCH:
+		ut_error;
+		break;
+	case BUF_BLOCK_ZIP_PAGE:
+		ut_a(state == BUF_BLOCK_ZIP_DIRTY);
+		break;
+	case BUF_BLOCK_ZIP_DIRTY:
+		ut_a(state == BUF_BLOCK_ZIP_PAGE);
+		break;
+	case BUF_BLOCK_NOT_USED:
+		ut_a(state == BUF_BLOCK_READY_FOR_USE);
+		break;
+	case BUF_BLOCK_READY_FOR_USE:
+		ut_a(state == BUF_BLOCK_MEMORY
+		     || state == BUF_BLOCK_FILE_PAGE
+		     || state == BUF_BLOCK_NOT_USED);
+		break;
+	case BUF_BLOCK_MEMORY:
+		ut_a(state == BUF_BLOCK_NOT_USED);
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+		ut_a(state == BUF_BLOCK_NOT_USED
+		     || state == BUF_BLOCK_REMOVE_HASH);
+		break;
+	case BUF_BLOCK_REMOVE_HASH:
+		ut_a(state == BUF_BLOCK_MEMORY);
+		break;
+	}
+#endif /* UNIV_DEBUG */
+	bpage->state = state;
+	ut_ad(buf_page_get_state(bpage) == state);
+}
+
+/*********************************************************************//**
+Sets the state of a block. */
+UNIV_INLINE
+void
+buf_block_set_state(
+/*================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	enum buf_page_state	state)	/*!< in: state */
+{
+	buf_page_set_state(&block->page, state);
+}
+
+/*********************************************************************//**
+Determines if a block is mapped to a tablespace.
+@return	TRUE if mapped */
+UNIV_INLINE
+ibool
+buf_page_in_file(
+/*=============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+{
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_POOL_WATCH:
+		ut_error;
+		break;
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_FILE_PAGE:
+		return(TRUE);
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	}
+
+	return(FALSE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Determines if a block should be on unzip_LRU list.
+@return	TRUE if block belongs to unzip_LRU */
+UNIV_INLINE
+ibool
+buf_page_belongs_to_unzip_LRU(
+/*==========================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+{
+	ut_ad(buf_page_in_file(bpage));
+
+	return(bpage->zip.data
+	       && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+}
+
+/*********************************************************************//**
+Gets the mutex of a block.
+@return	pointer to mutex protecting bpage */
+UNIV_INLINE
+ib_mutex_t*
+buf_page_get_mutex(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to control block */
+{
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_POOL_WATCH:
+		ut_error;
+		return(NULL);
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY: {
+		buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+
+		return(&buf_pool->zip_mutex);
+		}
+	default:
+		return(&((buf_block_t*) bpage)->mutex);
+	}
+}
+
+/*********************************************************************//**
+Get the flush type of a page.
+@return	flush type */
+UNIV_INLINE
+buf_flush_t
+buf_page_get_flush_type(
+/*====================*/
+	const buf_page_t*	bpage)	/*!< in: buffer page */
+{
+	buf_flush_t	flush_type = (buf_flush_t) bpage->flush_type;
+
+#ifdef UNIV_DEBUG
+	switch (flush_type) {
+	case BUF_FLUSH_LRU:
+	case BUF_FLUSH_LIST:
+	case BUF_FLUSH_SINGLE_PAGE:
+		return(flush_type);
+	case BUF_FLUSH_N_TYPES:
+		ut_error;
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(flush_type);
+}
+/*********************************************************************//**
+Set the flush type of a page. */
+UNIV_INLINE
+void
+buf_page_set_flush_type(
+/*====================*/
+	buf_page_t*	bpage,		/*!< in: buffer page */
+	buf_flush_t	flush_type)	/*!< in: flush type */
+{
+	bpage->flush_type = flush_type;
+	ut_ad(buf_page_get_flush_type(bpage) == flush_type);
+}
+
+/*********************************************************************//**
+Map a block to a file page. */
+UNIV_INLINE
+void
+buf_block_set_file_page(
+/*====================*/
+	buf_block_t*		block,	/*!< in/out: pointer to control block */
+	ulint			space,	/*!< in: tablespace id */
+	ulint			page_no)/*!< in: page number */
+{
+	buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
+	block->page.space = static_cast<ib_uint32_t>(space);
+	block->page.offset = static_cast<ib_uint32_t>(page_no);
+}
+
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_page_get_io_fix(
+/*================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	ut_ad(bpage != NULL);
+
+	enum buf_io_fix	io_fix = (enum buf_io_fix) bpage->io_fix;
+#ifdef UNIV_DEBUG
+	switch (io_fix) {
+	case BUF_IO_NONE:
+	case BUF_IO_READ:
+	case BUF_IO_WRITE:
+	case BUF_IO_PIN:
+		return(io_fix);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(io_fix);
+}
+
+/*********************************************************************//**
+Gets the io_fix state of a block.
+@return	io_fix state */
+UNIV_INLINE
+enum buf_io_fix
+buf_block_get_io_fix(
+/*=================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(buf_page_get_io_fix(&block->page));
+}
+
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_page_set_io_fix(
+/*================*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix)	/*!< in: io_fix state */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	bpage->io_fix = io_fix;
+	ut_ad(buf_page_get_io_fix(bpage) == io_fix);
+}
+
+/*********************************************************************//**
+Sets the io_fix state of a block. */
+UNIV_INLINE
+void
+buf_block_set_io_fix(
+/*=================*/
+	buf_block_t*	block,	/*!< in/out: control block */
+	enum buf_io_fix	io_fix)	/*!< in: io_fix state */
+{
+	buf_page_set_io_fix(&block->page, io_fix);
+}
+
+/*********************************************************************//**
+Makes a block sticky. A sticky block implies that even after we release
+the buf_pool->mutex and the block->mutex:
+* it cannot be removed from the flush_list
+* the block descriptor cannot be relocated
+* it cannot be removed from the LRU list
+Note that:
+* the block can still change its position in the LRU list
+* the next and previous pointers can change. */
+UNIV_INLINE
+void
+buf_page_set_sticky(
+/*================*/
+	buf_page_t*	bpage)	/*!< in/out: control block */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+
+	bpage->io_fix = BUF_IO_PIN;
+}
+
+/*********************************************************************//**
+Removes stickiness of a block. */
+UNIV_INLINE
+void
+buf_page_unset_sticky(
+/*==================*/
+	buf_page_t*	bpage)	/*!< in/out: control block */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
+
+	bpage->io_fix = BUF_IO_NONE;
+}
+
+/********************************************************************//**
+Determine if a buffer block can be relocated in memory.  The block
+can be dirty, but it must not be I/O-fixed or bufferfixed. */
+UNIV_INLINE
+ibool
+buf_page_can_relocate(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< control block being relocated */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(bpage->in_LRU_list);
+
+	return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
+	       && bpage->buf_fix_count == 0);
+}
+
+/*********************************************************************//**
+Determine if a block has been flagged old.
+@return	TRUE if old */
+UNIV_INLINE
+ibool
+buf_page_is_old(
+/*============*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
+	ut_ad(buf_page_in_file(bpage));
+
+	return(bpage->old);
+}
+
+/*********************************************************************//**
+Flag a block old. */
+UNIV_INLINE
+void
+buf_page_set_old(
+/*=============*/
+	buf_page_t*	bpage,	/*!< in/out: control block */
+	ibool		old)	/*!< in: old */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+#endif /* UNIV_DEBUG */
+	ut_a(buf_page_in_file(bpage));
+	ut_ad(buf_pool_mutex_own(buf_pool));
+	ut_ad(bpage->in_LRU_list);
+
+#ifdef UNIV_LRU_DEBUG
+	ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL));
+	/* If a block is flagged "old", the LRU_old list must exist. */
+	ut_a(!old || buf_pool->LRU_old);
+
+	if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) {
+		const buf_page_t*	prev = UT_LIST_GET_PREV(LRU, bpage);
+		const buf_page_t*	next = UT_LIST_GET_NEXT(LRU, bpage);
+		if (prev->old == next->old) {
+			ut_a(prev->old == old);
+		} else {
+			ut_a(!prev->old);
+			ut_a(buf_pool->LRU_old == (old ? bpage : next));
+		}
+	}
+#endif /* UNIV_LRU_DEBUG */
+
+	bpage->old = old;
+}
+
+/*********************************************************************//**
+Determine the time of first access of a block in the buffer pool.
+@return	ut_time_ms() at the time of first access, 0 if not accessed */
+UNIV_INLINE
+unsigned
+buf_page_is_accessed(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: control block */
+{
+	ut_ad(buf_page_in_file(bpage));
+
+	return(bpage->access_time);
+}
+
+/*********************************************************************//**
+Flag a block accessed. */
+UNIV_INLINE
+void
+buf_page_set_accessed(
+/*==================*/
+	buf_page_t*	bpage)		/*!< in/out: control block */
+{
+#ifdef UNIV_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
+	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+#endif /* UNIV_DEBUG */
+
+	ut_a(buf_page_in_file(bpage));
+
+	if (bpage->access_time == 0) {
+		/* Make this the time of the first access. */
+		bpage->access_time = static_cast<uint>(ut_time_ms());
+	}
+}
+
+/*********************************************************************//**
+Gets the buf_block_t handle of a buffered file block if an uncompressed
+page frame exists, or NULL.
+@return	control block, or NULL */
+UNIV_INLINE
+buf_block_t*
+buf_page_get_block(
+/*===============*/
+	buf_page_t*	bpage)	/*!< in: control block, or NULL */
+{
+	if (bpage != NULL) {
+		ut_ad(buf_page_in_file(bpage));
+
+		if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+			return((buf_block_t*) bpage);
+		}
+	}
+
+	return(NULL);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	ut_ad(block);
+
+	switch (buf_block_get_state(block)) {
+	case BUF_BLOCK_POOL_WATCH:
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+	case BUF_BLOCK_NOT_USED:
+		ut_error;
+		break;
+	case BUF_BLOCK_FILE_PAGE:
+# ifndef UNIV_HOTBACKUP
+		ut_a(block->page.buf_fix_count > 0);
+# endif /* !UNIV_HOTBACKUP */
+		/* fall through */
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		goto ok;
+	}
+	ut_error;
+ok:
+	return((buf_frame_t*) block->frame);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_page_get_space(
+/*===============*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	ut_ad(bpage);
+	ut_a(buf_page_in_file(bpage));
+
+	return(bpage->space);
+}
+
+/*********************************************************************//**
+Gets the space id of a block.
+@return	space id */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	return(block->page.space);
+}
+
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_page_get_page_no(
+/*=================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	ut_ad(bpage);
+	ut_a(buf_page_in_file(bpage));
+
+	return(bpage->offset);
+}
+/***********************************************************************
+FIXME_FTS Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+                        /* out: pointer to frame */
+        byte*   ptr)    /* in: pointer to a frame */
+{
+        buf_frame_t*    frame;
+
+        ut_ad(ptr);
+
+        frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+
+        return(frame);
+}
+
+/*********************************************************************//**
+Gets the page number of a block.
+@return	page number */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+	return(block->page.offset);
+}
+
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_page_get_zip_size(
+/*==================*/
+	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
+{
+	return(bpage->zip.ssize
+	       ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0);
+}
+
+/*********************************************************************//**
+Gets the compressed page size of a block.
+@return	compressed page size, or 0 */
+UNIV_INLINE
+ulint
+buf_block_get_zip_size(
+/*===================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
+	return(block->page.zip.ssize
+	       ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0);
+}
+
+#ifndef UNIV_HOTBACKUP
+#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
+/*********************************************************************//**
+Gets the compressed page descriptor corresponding to an uncompressed page
+if applicable.
+@return	compressed page descriptor, or NULL */
+UNIV_INLINE
+const page_zip_des_t*
+buf_frame_get_page_zip(
+/*===================*/
+	const byte*	ptr)	/*!< in: pointer to the page */
+{
+	return(buf_block_get_page_zip(buf_block_align(ptr)));
+}
+#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	const void*	ptr,	/*!< in: pointer to a buffer frame */
+	ulint*		space,	/*!< out: space id */
+	fil_addr_t*	addr)	/*!< out: page offset and byte offset */
+{
+	const page_t*	page = (const page_t*) ut_align_down(ptr,
+							     UNIV_PAGE_SIZE);
+
+	*space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
+	addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table.
+@return	lock hash value */
+UNIV_INLINE
+ulint
+buf_block_get_lock_hash_val(
+/*========================*/
+	const buf_block_t*	block)	/*!< in: block */
+{
+	ut_ad(block);
+	ut_ad(buf_page_in_file(&block->page));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
+	      || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+	return(block->lock_hash_val);
+}
+
+/********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function.
+@return: the allocated descriptor. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+{
+	buf_page_t*	bpage;
+
+	bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
+	ut_d(memset(bpage, 0, sizeof *bpage));
+	UNIV_MEM_ALLOC(bpage, sizeof *bpage);
+
+	return(bpage);
+}
+
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: bpage descriptor to free. */
+{
+	ut_free(bpage);
+}
+
+/********************************************************************//**
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+	buf_block_t*	block)	/*!< in, own: block to be freed */
+{
+	buf_pool_t*	buf_pool = buf_pool_from_bpage((buf_page_t*) block);
+
+	buf_pool_mutex_enter(buf_pool);
+
+	mutex_enter(&block->mutex);
+
+	ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+	buf_LRU_block_free_non_file_page(block);
+
+	mutex_exit(&block->mutex);
+
+	buf_pool_mutex_exit(buf_pool);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Copies contents of a buffer frame to a given buffer.
+@return	buf */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+	byte*			buf,	/*!< in: buffer to copy to */
+	const buf_frame_t*	frame)	/*!< in: buffer frame */
+{
+	ut_ad(buf && frame);
+
+	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+
+	return(buf);
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Calculates a folded value of a file page address to use in the page hash
+table.
+@return	the folded value */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+{
+	return((space << 20) + space + offset);
+}
+
+/********************************************************************//**
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet.
+@return	newest modification to page */
+UNIV_INLINE
+lsn_t
+buf_page_get_newest_modification(
+/*=============================*/
+	const buf_page_t*	bpage)	/*!< in: block containing the
+					page frame */
+{
+	lsn_t		lsn;
+	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+
+	mutex_enter(block_mutex);
+
+	if (buf_page_in_file(bpage)) {
+		lsn = bpage->newest_modification;
+	} else {
+		lsn = 0;
+	}
+
+	mutex_exit(block_mutex);
+
+	return(lsn);
+}
+
+/********************************************************************//**
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+void
+buf_block_modify_clock_inc(
+/*=======================*/
+	buf_block_t*	block)	/*!< in: block */
+{
+#ifdef UNIV_SYNC_DEBUG
+	buf_pool_t*	buf_pool = buf_pool_from_bpage((buf_page_t*) block);
+
+	ut_ad((buf_pool_mutex_own(buf_pool)
+	       && (block->page.buf_fix_count == 0))
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+	block->modify_clock++;
+}
+
+/********************************************************************//**
+Returns the value of the modify clock. The caller must have an s-lock
+or x-lock on the block.
+@return	value */
+UNIV_INLINE
+ib_uint64_t
+buf_block_get_modify_clock(
+/*=======================*/
+	buf_block_t*	block)	/*!< in: block */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+	return(block->modify_clock);
+}
+
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_fix(
+/*===========*/
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+{
+#ifdef PAGE_ATOMIC_REF_COUNT
+	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
+#else
+	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
+
+	mutex_enter(block_mutex);
+	++block->page.buf_fix_count;
+	mutex_exit(block_mutex);
+#endif /* PAGE_ATOMIC_REF_COUNT */
+}
+
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+#ifdef UNIV_SYNC_DEBUG
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line */
+#endif /* UNIV_SYNC_DEBUG */
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ibool	ret;
+
+	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
+	ut_a(ret);
+#endif /* UNIV_SYNC_DEBUG */
+
+#ifdef PAGE_ATOMIC_REF_COUNT
+	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
+#else
+	ut_ad(mutex_own(&block->mutex));
+
+	++block->page.buf_fix_count;
+#endif /* PAGE_ATOMIC_REF_COUNT */
+}
+
+/*******************************************************************//**
+Decrements the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_unfix(
+/*============*/
+	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
+{
+	ut_ad(block->page.buf_fix_count > 0);
+
+#ifdef PAGE_ATOMIC_REF_COUNT
+	os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
+#else
+	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
+
+	mutex_enter(block_mutex);
+	--block->page.buf_fix_count;
+	mutex_exit(block_mutex);
+#endif /* PAGE_ATOMIC_REF_COUNT */
+}
+
+/*******************************************************************//**
+Decrements the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_dec(
+/*==================*/
+	buf_block_t*	block)	/*!< in/out: block to bufferunfix */
+{
+	ut_ad(block->page.buf_fix_count > 0);
+
+#ifdef PAGE_ATOMIC_REF_COUNT
+	os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
+#else
+	mutex_enter(&block->mutex);
+	--block->page.buf_fix_count;
+	mutex_exit(&block->mutex);
+#endif /* PAGE_ATOMIC_REF_COUNT */
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_s_unlock(&block->debug_latch);
+#endif
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given space and offset of page
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_get(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: offset of the page within space */
+{
+	ulint	fold;
+	ulint	index;
+	ulint	ignored_offset;
+
+	ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/
+	fold = buf_page_address_fold(space, ignored_offset);
+	index = fold % srv_buf_pool_instances;
+	return(&buf_pool_ptr[index]);
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given its array index
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_array(
+/*================*/
+	ulint	index)		/*!< in: array index to get
+				buffer pool instance from */
+{
+	ut_ad(index < MAX_BUFFER_POOLS);
+	ut_ad(index < srv_buf_pool_instances);
+	return(&buf_pool_ptr[index]);
+}
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get_low(
+/*==================*/
+	buf_pool_t*	buf_pool,/*!< buffer pool instance */
+	ulint		space,	/*!< in: space id */
+	ulint		offset,	/*!< in: offset of the page within space */
+	ulint		fold)	/*!< in: buf_page_address_fold(space, offset) */
+{
+	buf_page_t*	bpage;
+
+#ifdef UNIV_SYNC_DEBUG
+	ulint		hash_fold;
+	rw_lock_t*	hash_lock;
+
+	hash_fold = buf_page_address_fold(space, offset);
+	ut_ad(hash_fold == fold);
+
+	hash_lock = hash_get_lock(buf_pool->page_hash, fold);
+	ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)
+	      || rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+	/* Look for the page in the hash table */
+
+	HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
+		    ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
+			  && buf_page_in_file(bpage)),
+		    bpage->space == space && bpage->offset == offset);
+	if (bpage) {
+		ut_a(buf_page_in_file(bpage));
+		ut_ad(bpage->in_page_hash);
+		ut_ad(!bpage->in_zip_hash);
+	}
+
+	return(bpage);
+}
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
+@return	block, NULL if not found, or watch sentinel (if watch is true) */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get_locked(
+/*=====================*/
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_lock
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	rw_lock_t**	lock,		/*!< in/out: lock of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_lock
+					is released by this function */
+	ulint		lock_mode,	/*!< in: RW_LOCK_EX or
+					RW_LOCK_SHARED. Ignored if
+					lock == NULL */
+	bool		watch)		/*!< in: if true, return watch
+					sentinel also. */
+{
+	buf_page_t*	bpage = NULL;
+	ulint		fold;
+	rw_lock_t*	hash_lock;
+	ulint		mode = RW_LOCK_SHARED;
+
+	if (lock != NULL) {
+		*lock = NULL;
+		ut_ad(lock_mode == RW_LOCK_EX
+		      || lock_mode == RW_LOCK_SHARED);
+		mode = lock_mode;
+	}
+
+	fold = buf_page_address_fold(space, offset);
+	hash_lock = hash_get_lock(buf_pool->page_hash, fold);
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
+	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (mode == RW_LOCK_SHARED) {
+		rw_lock_s_lock(hash_lock);
+	} else {
+		rw_lock_x_lock(hash_lock);
+	}
+
+	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+
+	if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+		if (!watch) {
+			bpage = NULL;
+		}
+		goto unlock_and_exit;
+	}
+
+	ut_ad(buf_page_in_file(bpage));
+	ut_ad(offset == bpage->offset);
+	ut_ad(space == bpage->space);
+
+	if (lock == NULL) {
+		/* The caller wants us to release the page_hash lock */
+		goto unlock_and_exit;
+	} else {
+		/* To be released by the caller */
+		*lock = hash_lock;
+		goto exit;
+	}
+
+unlock_and_exit:
+	if (mode == RW_LOCK_SHARED) {
+		rw_lock_s_unlock(hash_lock);
+	} else {
+		rw_lock_x_unlock(hash_lock);
+	}
+exit:
+	return(bpage);
+}
+
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+If the block is found and lock is not NULL then the appropriate
+page_hash lock is acquired in the specified lock mode. Otherwise,
+mode value is ignored. It is up to the caller to release the
+lock. If the block is found and the lock is NULL then the page_hash
+lock is released by this function.
+@return	block, NULL if not found */
+UNIV_INLINE
+buf_block_t*
+buf_block_hash_get_locked(
+/*=====================*/
+					/*!< out: pointer to the bpage,
+					or NULL; if NULL, hash_lock
+					is also NULL. */
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page number */
+	rw_lock_t**	lock,		/*!< in/out: lock of the page
+					hash acquired if bpage is
+					found. NULL otherwise. If NULL
+					is passed then the hash_lock
+					is released by this function */
+	ulint		lock_mode)	/*!< in: RW_LOCK_EX or
+					RW_LOCK_SHARED. Ignored if
+					lock == NULL */
+{
+	buf_page_t*	bpage = buf_page_hash_get_locked(buf_pool,
+							 space,
+							 offset,
+							 lock,
+							 lock_mode);
+	buf_block_t*	block = buf_page_get_block(bpage);
+
+	if (block) {
+		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+#ifdef UNIV_SYNC_DEBUG
+		ut_ad(!lock || rw_lock_own(*lock, lock_mode));
+#endif /* UNIV_SYNC_DEBUG */
+		return(block);
+	} else if (bpage) {
+		/* It is not a block. Just a bpage */
+		ut_ad(buf_page_in_file(bpage));
+
+		if (lock) {
+			if (lock_mode == RW_LOCK_SHARED) {
+				rw_lock_s_unlock(*lock);
+			} else {
+				rw_lock_x_unlock(*lock);
+			}
+		}
+		*lock = NULL;
+		return(NULL);
+	}
+
+	ut_ad(!bpage);
+	ut_ad(lock == NULL ||*lock == NULL);
+	return(NULL);
+}
+
+/********************************************************************//**
+Returns TRUE if the page can be found in the buffer pool hash table.
+
+NOTE that it is possible that the page is not yet read from disk,
+though.
+
+@return	TRUE if found in the page hash table */
+UNIV_INLINE
+ibool
+buf_page_peek(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset)	/*!< in: page number */
+{
+	buf_pool_t*		buf_pool = buf_pool_get(space, offset);
+
+	return(buf_page_hash_get(buf_pool, space, offset) != NULL);
+}
+
+/********************************************************************//**
+Releases a compressed-only page acquired with buf_page_get_zip(). */
+UNIV_INLINE
+void
+buf_page_release_zip(
+/*=================*/
+	buf_page_t*	bpage)		/*!< in: buffer block */
+{
+	buf_block_t*	block;
+
+	block = (buf_block_t*) bpage;
+
+	switch (buf_page_get_state(bpage)) {
+	case BUF_BLOCK_FILE_PAGE:
+#ifdef UNIV_SYNC_DEBUG
+		rw_lock_s_unlock(&block->debug_latch);
+#endif /* UNUV_SYNC_DEBUG */
+		/* Fall through */
+	case BUF_BLOCK_ZIP_PAGE:
+	case BUF_BLOCK_ZIP_DIRTY:
+		buf_block_unfix(block);
+		return;
+
+	case BUF_BLOCK_POOL_WATCH:
+	case BUF_BLOCK_NOT_USED:
+	case BUF_BLOCK_READY_FOR_USE:
+	case BUF_BLOCK_MEMORY:
+	case BUF_BLOCK_REMOVE_HASH:
+		break;
+	}
+
+	ut_error;
+}
+
+/********************************************************************//**
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/*!< in: buffer block */
+	ulint		rw_latch)	/*!< in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+{
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_s_unlock(&(block->debug_latch));
+#endif
+	if (rw_latch == RW_S_LATCH) {
+		rw_lock_s_unlock(&(block->lock));
+	} else if (rw_latch == RW_X_LATCH) {
+		rw_lock_x_unlock(&(block->lock));
+	}
+
+	buf_block_unfix(block);
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/*********************************************************************//**
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. */
+UNIV_INLINE
+void
+buf_block_dbg_add_level(
+/*====================*/
+	buf_block_t*	block,	/*!< in: buffer page
+				where we have acquired latch */
+	ulint		level)	/*!< in: latching order level */
+{
+	sync_thread_add_level(&block->lock, level, FALSE);
+}
+
+#endif /* UNIV_SYNC_DEBUG */
+/********************************************************************//**
+Acquire mutex on all buffer pool instances. */
+UNIV_INLINE
+void
+buf_pool_mutex_enter_all(void)
+/*==========================*/
+{
+	ulint   i;
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+
+		buf_pool = buf_pool_from_array(i);
+		buf_pool_mutex_enter(buf_pool);
+	}
+}
+
+/********************************************************************//**
+Release mutex on all buffer pool instances. */
+UNIV_INLINE
+void
+buf_pool_mutex_exit_all(void)
+/*=========================*/
+{
+	ulint   i;
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+
+		buf_pool = buf_pool_from_array(i);
+		buf_pool_mutex_exit(buf_pool);
+	}
+}
+/*********************************************************************//**
+Get the nth chunk's buffer block in the specified buffer pool.
+@return the nth chunk's buffer block. */
+UNIV_INLINE
+buf_block_t*
+buf_get_nth_chunk_block(
+/*====================*/
+	const buf_pool_t* buf_pool,	/*!< in: buffer pool instance */
+	ulint		n,		/*!< in: nth chunk in the buffer pool */
+	ulint*		chunk_size)	/*!< in: chunk size */
+{
+	const buf_chunk_t*	chunk;
+
+	chunk = buf_pool->chunks + n;
+	*chunk_size = chunk->size;
+	return(chunk->blocks);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
new file mode 100644
index 00000000000..cd21781dc6e
--- /dev/null
+++ b/storage/innobase/include/buf0checksum.h
@@ -0,0 +1,88 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0checksum.h
+Buffer pool checksum functions, also linked from /extra/innochecksum.cc
+
+Created Aug 11, 2011 Vasil Dimov
+*******************************************************/
+
+#ifndef buf0checksum_h
+#define buf0checksum_h
+
+#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "buf0types.h"
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************************//**
+Calculates a page CRC32 which is stored to the page when it is written
+to a file. Note that we must be careful to calculate the same value on
+32-bit and 64-bit architectures.
+@return	checksum */
+UNIV_INTERN
+ib_uint32_t
+buf_calc_page_crc32(
+/*================*/
+	const byte*	page);	/*!< in: buffer page */
+
+/********************************************************************//**
+Calculates a page checksum which is stored to the page when it is written
+to a file. Note that we must be careful to calculate the same value on
+32-bit and 64-bit architectures.
+@return	checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_new_checksum(
+/*=======================*/
+	const byte*	page);	/*!< in: buffer page */
+
+/********************************************************************//**
+In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+looked at the first few bytes of the page. This calculates that old
+checksum.
+NOTE: we must first store the new formula checksum to
+FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
+because this takes that field as an input!
+@return	checksum */
+UNIV_INTERN
+ulint
+buf_calc_page_old_checksum(
+/*=======================*/
+	const byte*	page);	/*!< in: buffer page */
+
+#ifndef UNIV_INNOCHECKSUM
+
+/********************************************************************//**
+Return a printable string describing the checksum algorithm.
+@return	algorithm name */
+UNIV_INTERN
+const char*
+buf_checksum_algorithm_name(
+/*========================*/
+	srv_checksum_algorithm_t	algo);	/*!< in: algorithm */
+
+extern ulong	srv_checksum_algorithm;
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+#endif /* buf0checksum_h */
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
new file mode 100644
index 00000000000..a62a6400d97
--- /dev/null
+++ b/storage/innobase/include/buf0dblwr.h
@@ -0,0 +1,162 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0dblwr.h
+Doublewrite buffer module
+
+Created 2011/12/19 Inaam Rana
+*******************************************************/
+
+#ifndef buf0dblwr_h
+#define buf0dblwr_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "log0log.h"
+#include "log0recv.h"
+
+#ifndef UNIV_HOTBACKUP
+
+/** Doublewrite system */
+extern buf_dblwr_t*	buf_dblwr;
+/** Set to TRUE when the doublewrite buffer is being created */
+extern ibool		buf_dblwr_being_created;
+
+/****************************************************************//**
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
+doublewrite buffer is placed on the trx system header page. */
+UNIV_INTERN
+void
+buf_dblwr_create(void);
+/*==================*/
+
+/****************************************************************//**
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function loads the pages from double write buffer into memory. */
+void
+buf_dblwr_init_or_load_pages(
+/*=========================*/
+	os_file_t	file,
+	char*		path,
+	bool		load_corrupt_pages);
+
+/****************************************************************//**
+Process the double write buffer pages. */
+void
+buf_dblwr_process(void);
+/*===================*/
+
+/****************************************************************//**
+frees doublewrite buffer. */
+UNIV_INTERN
+void
+buf_dblwr_free(void);
+/*================*/
+/********************************************************************//**
+Updates the doublewrite buffer when an IO request is completed. */
+UNIV_INTERN
+void
+buf_dblwr_update(
+/*=============*/
+	const buf_page_t*	bpage,	/*!< in: buffer block descriptor */
+	buf_flush_t		flush_type);/*!< in: flush type */
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
+ibool
+buf_dblwr_page_inside(
+/*==================*/
+	ulint	page_no);	/*!< in: page number */
+/********************************************************************//**
+Posts a buffer page for writing. If the doublewrite memory buffer is
+full, calls buf_dblwr_flush_buffered_writes and waits for for free
+space to appear. */
+UNIV_INTERN
+void
+buf_dblwr_add_to_batch(
+/*====================*/
+	buf_page_t*	bpage);	/*!< in: buffer block to write */
+/********************************************************************//**
+Flushes possible buffered writes from the doublewrite memory buffer to disk,
+and also wakes up the aio thread if simulated aio is used. It is very
+important to call this function after a batch of writes has been posted,
+and also when we may have to wait for a page latch! Otherwise a deadlock
+of threads can occur. */
+UNIV_INTERN
+void
+buf_dblwr_flush_buffered_writes(void);
+/*=================================*/
+/********************************************************************//**
+Writes a page to the doublewrite buffer on disk, sync it, then write
+the page to the datafile and sync the datafile. This function is used
+for single page flushes. If all the buffers allocated for single page
+flushes in the doublewrite buffer are in use we wait here for one to
+become free. We are guaranteed that a slot will become free because any
+thread that is using a slot must also release the slot before leaving
+this function. */
+UNIV_INTERN
+void
+buf_dblwr_write_single_page(
+/*========================*/
+	buf_page_t*	bpage,	/*!< in: buffer block to write */
+	bool		sync);	/*!< in: true if sync IO requested */
+
+/** Doublewrite control struct */
+struct buf_dblwr_t{
+	ib_mutex_t	mutex;	/*!< mutex protecting the first_free
+				field and write_buf */
+	ulint		block1;	/*!< the page number of the first
+				doublewrite block (64 pages) */
+	ulint		block2;	/*!< page number of the second block */
+	ulint		first_free;/*!< first free position in write_buf
+				measured in units of UNIV_PAGE_SIZE */
+	ulint		b_reserved;/*!< number of slots currently reserved
+				for batch flush. */
+	os_event_t	b_event;/*!< event where threads wait for a
+				batch flush to end. */
+	ulint		s_reserved;/*!< number of slots currently
+				reserved for single page flushes. */
+	os_event_t	s_event;/*!< event where threads wait for a
+				single page flush slot. */
+	bool*		in_use;	/*!< flag used to indicate if a slot is
+				in use. Only used for single page
+				flushes. */
+	bool		batch_running;/*!< set to TRUE if currently a batch
+				is being written from the doublewrite
+				buffer. */
+	byte*		write_buf;/*!< write buffer used in writing to the
+				doublewrite buffer, aligned to an
+				address divisible by UNIV_PAGE_SIZE
+				(which is required by Windows aio) */
+	byte*		write_buf_unaligned;/*!< pointer to write_buf,
+				but unaligned */
+	buf_page_t**	buf_block_arr;/*!< array to store pointers to
+				the buffer blocks which have been
+				cached to write_buf */
+};
+
+
+#endif /* UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/buf0dump.h b/storage/innobase/include/buf0dump.h
new file mode 100644
index 00000000000..c704a8e97e0
--- /dev/null
+++ b/storage/innobase/include/buf0dump.h
@@ -0,0 +1,72 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0dump.h
+Implements a buffer pool dump/load.
+
+Created April 08, 2011 Vasil Dimov
+*******************************************************/
+
+#ifndef buf0dump_h
+#define buf0dump_h
+
+#include "univ.i"
+
+/*****************************************************************//**
+Wakes up the buffer pool dump/load thread and instructs it to start
+a dump. This function is called by MySQL code via buffer_pool_dump_now()
+and it should return immediately because the whole MySQL is frozen during
+its execution. */
+UNIV_INTERN
+void
+buf_dump_start();
+/*============*/
+
+/*****************************************************************//**
+Wakes up the buffer pool dump/load thread and instructs it to start
+a load. This function is called by MySQL code via buffer_pool_load_now()
+and it should return immediately because the whole MySQL is frozen during
+its execution. */
+UNIV_INTERN
+void
+buf_load_start();
+/*============*/
+
+/*****************************************************************//**
+Aborts a currently running buffer pool load. This function is called by
+MySQL code via buffer_pool_load_abort() and it should return immediately
+because the whole MySQL is frozen during its execution. */
+UNIV_INTERN
+void
+buf_load_abort();
+/*============*/
+
+/*****************************************************************//**
+This is the main thread for buffer pool dump/load. It waits for an
+event and when waked up either performs a dump or load and sleeps
+again.
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(buf_dump_thread)(
+/*============================*/
+	void*	arg);				/*!< in: a dummy parameter
+						required by os_thread_create */
+
+#endif /* buf0dump_h */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
new file mode 100644
index 00000000000..f116720574b
--- /dev/null
+++ b/storage/innobase/include/buf0flu.h
@@ -0,0 +1,286 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0flu.h
+The database buffer pool flush algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0flu_h
+#define buf0flu_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "log0log.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0types.h"
+#include "buf0types.h"
+
+/** Flag indicating if the page_cleaner is in active state. */
+extern ibool buf_page_cleaner_is_active;
+
+/********************************************************************//**
+Remove a block from the flush list of modified blocks. */
+UNIV_INTERN
+void
+buf_flush_remove(
+/*=============*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+/*******************************************************************//**
+Relocates a buffer control block on the flush_list.
+Note that it is assumed that the contents of bpage has already been
+copied to dpage. */
+UNIV_INTERN
+void
+buf_flush_relocate_on_flush_list(
+/*=============================*/
+	buf_page_t*	bpage,	/*!< in/out: control block being moved */
+	buf_page_t*	dpage);	/*!< in/out: destination block */
+/********************************************************************//**
+Updates the flush system data structures when a write is completed. */
+UNIV_INTERN
+void
+buf_flush_write_complete(
+/*=====================*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Initializes a page for writing to the tablespace. */
+UNIV_INTERN
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*	page,		/*!< in/out: page */
+	void*	page_zip_,	/*!< in/out: compressed page, or NULL */
+	lsn_t	newest_lsn);	/*!< in: newest modification lsn
+				to the page */
+#ifndef UNIV_HOTBACKUP
+# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: buf_pool->mutex and block->mutex must be held upon entering this
+function, and they will be released by this function after flushing.
+This is loosely based on buf_flush_batch() and buf_flush_page().
+@return TRUE if the page was flushed and the mutexes released */
+UNIV_INTERN
+ibool
+buf_flush_page_try(
+/*===============*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	buf_block_t*	block)		/*!< in/out: buffer control block */
+	__attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the flush list of
+all buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
+UNIV_INTERN
+bool
+buf_flush_list(
+/*===========*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed);	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+/******************************************************************//**
+This function picks up a single dirty page from the tail of the LRU
+list, flushes it, removes it from page_hash and LRU list and puts
+it on the free list. It is called from user threads when they are
+unable to find a replacable page at the tail of the LRU list i.e.:
+when the background LRU flushing in the page_cleaner thread is not
+fast enough to keep pace with the workload.
+@return TRUE if success. */
+UNIV_INTERN
+ibool
+buf_flush_single_page_from_LRU(
+/*===========================*/
+	buf_pool_t*	buf_pool);	/*!< in/out: buffer pool instance */
+/******************************************************************//**
+Waits until a flush batch of the given type ends */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	buf_flush_t	type);		/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+/******************************************************************//**
+Waits until a flush batch of the given type ends. This is called by
+a thread that only wants to wait for a flush to end but doesn't do
+any flushing itself. */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end_wait_only(
+/*===============================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	buf_flush_t	type);		/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+/********************************************************************//**
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/*!< in: block which is modified */
+	mtr_t*		mtr);	/*!< in: mtr */
+/********************************************************************//**
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/*!< in: block which is modified */
+	lsn_t		start_lsn,	/*!< in: start lsn of the first mtr in a
+					set of mtr's */
+	lsn_t		end_lsn);	/*!< in: end lsn of the last mtr in the
+					set of mtr's */
+/********************************************************************//**
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., transition FILE_PAGE => NOT_USED allowed.
+@return	TRUE if can replace immediately */
+UNIV_INTERN
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+	buf_page_t*	bpage);	/*!< in: buffer control block, must be
+				buf_page_in_file(bpage) and in the LRU list */
+/******************************************************************//**
+page_cleaner thread tasked with flushing dirty pages from the buffer
+pools. As of now we'll have only one instance of this thread.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(buf_flush_page_cleaner_thread)(
+/*==========================================*/
+	void*	arg);		/*!< in: a dummy parameter required by
+				os_thread_create */
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_flush_LRU_tail(void);
+/*====================*/
+/*********************************************************************//**
+Wait for any possible LRU flushes that are in progress to end. */
+UNIV_INTERN
+void
+buf_flush_wait_LRU_batch_end(void);
+/*==============================*/
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/******************************************************************//**
+Validates the flush list.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+buf_flush_validate(
+/*===============*/
+	buf_pool_t*	buf_pool);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/********************************************************************//**
+Initialize the red-black tree to speed up insertions into the flush_list
+during recovery process. Should be called at the start of recovery
+process before any page has been read/written. */
+UNIV_INTERN
+void
+buf_flush_init_flush_rbt(void);
+/*==========================*/
+
+/********************************************************************//**
+Frees up the red-black tree. */
+UNIV_INTERN
+void
+buf_flush_free_flush_rbt(void);
+/*==========================*/
+
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: in simulated aio we must call
+os_aio_simulated_wake_handler_threads after we have posted a batch of
+writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
+held upon entering this function, and they will be released by this
+function if it returns true.
+@return TRUE if the page was flushed */
+UNIV_INTERN
+bool
+buf_flush_page(
+/*===========*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	buf_page_t*	bpage,		/*!< in: buffer control block */
+	buf_flush_t	flush_type,	/*!< in: type of flush */
+	bool		sync);		/*!< in: true if sync IO request */
+/********************************************************************//**
+Returns true if the block is modified and ready for flushing.
+@return	true if can flush immediately */
+UNIV_INTERN
+bool
+buf_flush_ready_for_flush(
+/*======================*/
+	buf_page_t*	bpage,	/*!< in: buffer control block, must be
+				buf_page_in_file(bpage) */
+	buf_flush_t	flush_type)/*!< in: type of flush */
+	__attribute__((warn_unused_result));
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush
+list in a particular buffer pool.
+@return	number of dirty pages present in a single buffer pool */
+UNIV_INTERN
+ulint
+buf_pool_get_dirty_pages_count(
+/*===========================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
+	ulint		id);		/*!< in: space id to check */
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush list.
+@return	count of dirty pages present in all the buffer pools */
+UNIV_INTERN
+ulint
+buf_flush_get_dirty_pages_count(
+/*============================*/
+	ulint		id);		/*!< in: space id to check */
+#endif /* UNIV_DEBUG */
+
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
new file mode 100644
index 00000000000..a763cd115fe
--- /dev/null
+++ b/storage/innobase/include/buf0flu.ic
@@ -0,0 +1,139 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0flu.ic
+The database buffer pool flush algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+#include "buf0buf.h"
+#include "mtr0mtr.h"
+#include "srv0srv.h"
+
+/********************************************************************//**
+Inserts a modified block into the flush list. */
+UNIV_INTERN
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_block_t*	block,		/*!< in/out: block which is modified */
+	lsn_t		lsn);		/*!< in: oldest modification */
+/********************************************************************//**
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+UNIV_INTERN
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_block_t*	block,		/*!< in/out: block which is modified */
+	lsn_t		lsn);		/*!< in: oldest modification */
+
+/********************************************************************//**
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it is not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/*!< in: block which is modified */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
+	ut_ad(!srv_read_only_mode);
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.buf_fix_count > 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(!buf_flush_list_mutex_own(buf_pool));
+	ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
+
+	ut_ad(mtr->start_lsn != 0);
+	ut_ad(mtr->modifications);
+
+	mutex_enter(&block->mutex);
+	ut_ad(block->page.newest_modification <= mtr->end_lsn);
+
+	block->page.newest_modification = mtr->end_lsn;
+
+	if (!block->page.oldest_modification) {
+		ut_a(mtr->made_dirty);
+		ut_ad(log_flush_order_mutex_own());
+		buf_flush_insert_into_flush_list(
+			buf_pool, block, mtr->start_lsn);
+	} else {
+		ut_ad(block->page.oldest_modification <= mtr->start_lsn);
+	}
+
+	mutex_exit(&block->mutex);
+
+	srv_stats.buf_pool_write_requests.inc();
+}
+
+/********************************************************************//**
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/*!< in: block which is modified */
+	lsn_t		start_lsn,	/*!< in: start lsn of the first mtr in a
+					set of mtr's */
+	lsn_t		end_lsn)	/*!< in: end lsn of the last mtr in the
+					set of mtr's */
+{
+	buf_pool_t*	buf_pool = buf_pool_from_block(block);
+
+	ut_ad(!srv_read_only_mode);
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->page.buf_fix_count > 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!buf_pool_mutex_own(buf_pool));
+	ut_ad(!buf_flush_list_mutex_own(buf_pool));
+	ut_ad(log_flush_order_mutex_own());
+
+	ut_ad(start_lsn != 0);
+	ut_ad(block->page.newest_modification <= end_lsn);
+
+	mutex_enter(&block->mutex);
+	block->page.newest_modification = end_lsn;
+
+	if (!block->page.oldest_modification) {
+		buf_flush_insert_sorted_into_flush_list(
+			buf_pool, block, start_lsn);
+	} else {
+		ut_ad(block->page.oldest_modification <= start_lsn);
+	}
+
+	mutex_exit(&block->mutex);
+
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
new file mode 100644
index 00000000000..ecdaef685a1
--- /dev/null
+++ b/storage/innobase/include/buf0lru.h
@@ -0,0 +1,310 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0lru.h
+The database buffer pool LRU replacement algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0lru_h
+#define buf0lru_h
+
+#include "univ.i"
+#ifndef UNIV_HOTBACKUP
+#include "ut0byte.h"
+#include "buf0types.h"
+
+// Forward declaration
+struct trx_t;
+
+/******************************************************************//**
+Returns TRUE if less than 25 % of the buffer pool is available. This can be
+used in heuristics to prevent huge transactions eating up the whole buffer
+pool for their locks.
+@return	TRUE if less than 25 % of buffer pool left */
+UNIV_INTERN
+ibool
+buf_LRU_buf_pool_running_out(void);
+/*==============================*/
+
+/*#######################################################################
+These are low-level functions
+#########################################################################*/
+
+/** Minimum LRU list length for which the LRU_old pointer is defined */
+#define BUF_LRU_OLD_MIN_LEN	512	/* 8 megabytes of 16k pages */
+
+/******************************************************************//**
+Flushes all dirty pages or removes all pages belonging
+to a given tablespace. A PROBLEM: if readahead is being started, what
+guarantees that it will not try to read in pages after this operation
+has completed? */
+UNIV_INTERN
+void
+buf_LRU_flush_or_remove_pages(
+/*==========================*/
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove,	/*!< in: remove or flush strategy */
+	const trx_t*	trx);		/*!< to check if the operation must
+					be interrupted */
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/********************************************************************//**
+Insert a compressed block into buf_pool->zip_clean in the LRU order. */
+UNIV_INTERN
+void
+buf_LRU_insert_zip_clean(
+/*=====================*/
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/******************************************************************//**
+Try to free a block.  If bpage is a descriptor of a compressed-only
+page, the descriptor object will be freed as well.
+
+NOTE: If this function returns true, it will temporarily
+release buf_pool->mutex.  Furthermore, the page frame will no longer be
+accessible via bpage.
+
+The caller must hold buf_pool->mutex and must not hold any
+buf_page_get_mutex() when calling this function.
+@return true if freed, false otherwise. */
+UNIV_INTERN
+bool
+buf_LRU_free_page(
+/*==============*/
+	buf_page_t*	bpage,	/*!< in: block to be freed */
+	bool		zip)	/*!< in: true if should remove also the
+				compressed page of an uncompressed page */
+	__attribute__((nonnull));
+/******************************************************************//**
+Try to free a replaceable block.
+@return	TRUE if found and freed */
+UNIV_INTERN
+ibool
+buf_LRU_scan_and_free_block(
+/*========================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	ibool		scan_all)	/*!< in: scan whole LRU list
+					if TRUE, otherwise scan only
+					'old' blocks. */
+	__attribute__((nonnull,warn_unused_result));
+/******************************************************************//**
+Returns a free block from the buf_pool.  The block is taken off the
+free list.  If it is empty, returns NULL.
+@return	a free control block, or NULL if the buf_block->free list is empty */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_only(
+/*==================*/
+	buf_pool_t*	buf_pool);	/*!< buffer pool instance */
+/******************************************************************//**
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list.
+This function is called from a user thread when it needs a clean
+block to read in a page. Note that we only ever get a block from
+the free list. Even when we flush a page or find a page in LRU scan
+we put it to free list to be used.
+* iteration 0:
+  * get a block from free list, success:done
+  * if there is an LRU flush batch in progress:
+    * wait for batch to end: retry free list
+  * if buf_pool->try_LRU_scan is set
+    * scan LRU up to srv_LRU_scan_depth to find a clean block
+    * the above will put the block on free list
+    * success:retry the free list
+  * flush one dirty page from tail of LRU to disk
+    * the above will put the block on free list
+    * success: retry the free list
+* iteration 1:
+  * same as iteration 0 except:
+    * scan whole LRU list
+    * scan LRU list even if buf_pool->try_LRU_scan is not set
+* iteration > 1:
+  * same as iteration 1 but sleep 100ms
+@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
+UNIV_INTERN
+buf_block_t*
+buf_LRU_get_free_block(
+/*===================*/
+	buf_pool_t*	buf_pool)	/*!< in/out: buffer pool instance */
+	__attribute__((nonnull,warn_unused_result));
+/******************************************************************//**
+Determines if the unzip_LRU list should be used for evicting a victim
+instead of the general LRU list.
+@return	TRUE if should use unzip_LRU */
+UNIV_INTERN
+ibool
+buf_LRU_evict_from_unzip_LRU(
+/*=========================*/
+	buf_pool_t*	buf_pool);
+/******************************************************************//**
+Puts a block back to the free list. */
+UNIV_INTERN
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+	buf_block_t*	block);	/*!< in: block, must not contain a file page */
+/******************************************************************//**
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
+UNIV_INTERN
+void
+buf_LRU_add_block(
+/*==============*/
+	buf_page_t*	bpage,	/*!< in: control block */
+	ibool		old);	/*!< in: TRUE if should be put to the old
+				blocks in the LRU list, else put to the
+				start; if the LRU list is very short, added to
+				the start regardless of this parameter */
+/******************************************************************//**
+Adds a block to the LRU list of decompressed zip pages. */
+UNIV_INTERN
+void
+buf_unzip_LRU_add_block(
+/*====================*/
+	buf_block_t*	block,	/*!< in: control block */
+	ibool		old);	/*!< in: TRUE if should be put to the end
+				of the list, else put to the start */
+/******************************************************************//**
+Moves a block to the start of the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_make_block_young(
+/*=====================*/
+	buf_page_t*	bpage);	/*!< in: control block */
+/******************************************************************//**
+Moves a block to the end of the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_make_block_old(
+/*===================*/
+	buf_page_t*	bpage);	/*!< in: control block */
+/**********************************************************************//**
+Updates buf_pool->LRU_old_ratio.
+@return	updated old_pct */
+UNIV_INTERN
+ulint
+buf_LRU_old_ratio_update(
+/*=====================*/
+	uint	old_pct,/*!< in: Reserve this percentage of
+			the buffer pool for "old" blocks. */
+	ibool	adjust);/*!< in: TRUE=adjust the LRU list;
+			FALSE=just assign buf_pool->LRU_old_ratio
+			during the initialization of InnoDB */
+/********************************************************************//**
+Update the historical stats that we are collecting for LRU eviction
+policy at the end of each interval. */
+UNIV_INTERN
+void
+buf_LRU_stat_update(void);
+/*=====================*/
+
+/******************************************************************//**
+Remove one page from LRU list and put it to free list */
+UNIV_INTERN
+void
+buf_LRU_free_one_page(
+/*==================*/
+	buf_page_t*	bpage)	/*!< in/out: block, must contain a file page and
+				be in a state where it can be freed; there
+				may or may not be a hash index to the page */
+	__attribute__((nonnull));
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Validates the LRU list.
+@return	TRUE */
+UNIV_INTERN
+ibool
+buf_LRU_validate(void);
+/*==================*/
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Prints the LRU list. */
+UNIV_INTERN
+void
+buf_LRU_print(void);
+/*===============*/
+#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+/** @name Heuristics for detecting index scan @{ */
+/** The denominator of buf_pool->LRU_old_ratio. */
+#define BUF_LRU_OLD_RATIO_DIV	1024
+/** Maximum value of buf_pool->LRU_old_ratio.
+@see buf_LRU_old_adjust_len
+@see buf_pool->LRU_old_ratio_update */
+#define BUF_LRU_OLD_RATIO_MAX	BUF_LRU_OLD_RATIO_DIV
+/** Minimum value of buf_pool->LRU_old_ratio.
+@see buf_LRU_old_adjust_len
+@see buf_pool->LRU_old_ratio_update
+The minimum must exceed
+(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
+#define BUF_LRU_OLD_RATIO_MIN	51
+
+#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
+# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
+#endif
+#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
+# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
+#endif
+
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago.  Not protected by any mutex or latch. */
+extern uint	buf_LRU_old_threshold_ms;
+/* @} */
+
+/** @brief Statistics for selecting the LRU list for eviction.
+
+These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
+and page_zip_decompress() operations.  Based on the statistics we decide
+if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
+struct buf_LRU_stat_t
+{
+	ulint	io;	/**< Counter of buffer pool I/O operations. */
+	ulint	unzip;	/**< Counter of page_zip_decompress operations. */
+};
+
+/** Current operation counters.  Not protected by any mutex.
+Cleared by buf_LRU_stat_update(). */
+extern buf_LRU_stat_t	buf_LRU_stat_cur;
+
+/** Running sum of past values of buf_LRU_stat_cur.
+Updated by buf_LRU_stat_update().  Protected by buf_pool->mutex. */
+extern buf_LRU_stat_t	buf_LRU_stat_sum;
+
+/********************************************************************//**
+Increments the I/O counter in buf_LRU_stat_cur. */
+#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
+/********************************************************************//**
+Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
+#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
+
+#ifndef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic
new file mode 100644
index 00000000000..6e0da7a2588
--- /dev/null
+++ b/storage/innobase/include/buf0lru.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0lru.ic
+The database buffer replacement algorithm
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
new file mode 100644
index 00000000000..d2a1f264ff5
--- /dev/null
+++ b/storage/innobase/include/buf0rea.h
@@ -0,0 +1,177 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0rea.h
+The database buffer read
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0rea_h
+#define buf0rea_h
+
+#include "univ.i"
+#include "buf0types.h"
+
+/********************************************************************//**
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
+UNIV_INTERN
+ibool
+buf_read_page(
+/*==========*/
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
+UNIV_INTERN
+ibool
+buf_read_page_async(
+/*================*/
+	ulint	space,	/*!< in: space id */
+	ulint	offset);/*!< in: page number */
+/********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value!
+@return	number of page read requests issued */
+UNIV_INTERN
+ulint
+buf_read_ahead_random(
+/*==================*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes,
+				or 0 */
+	ulint	offset,		/*!< in: page number of a page which
+				the current thread wants to access */
+	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf
+				routine */
+/********************************************************************//**
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io.
+@return	number of page read requests issued */
+UNIV_INTERN
+ulint
+buf_read_ahead_linear(
+/*==================*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset,		/*!< in: page number; see NOTE 3 above */
+	ibool	inside_ibuf);	/*!< in: TRUE if we are inside ibuf routine */
+/********************************************************************//**
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract the insert buffer tree. Technically, this function is like
+a read-ahead function. */
+UNIV_INTERN
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+	bool		sync,		/*!< in: true if the caller
+					wants this function to wait
+					for the highest address page
+					to get read in, before this
+					function returns */
+	const ulint*	space_ids,	/*!< in: array of space ids */
+	const ib_int64_t* space_versions,/*!< in: the spaces must have
+					this version number
+					(timestamp), otherwise we
+					discard the read; we use this
+					to cancel reads if DISCARD +
+					IMPORT may have changed the
+					tablespace size */
+	const ulint*	page_nos,	/*!< in: array of page numbers
+					to read, with the highest page
+					number the last in the
+					array */
+	ulint		n_stored);	/*!< in: number of elements
+					in the arrays */
+/********************************************************************//**
+Issues read requests for pages which recovery wants to read in. */
+UNIV_INTERN
+void
+buf_read_recv_pages(
+/*================*/
+	ibool		sync,		/*!< in: TRUE if the caller
+					wants this function to wait
+					for the highest address page
+					to get read in, before this
+					function returns */
+	ulint		space,		/*!< in: space id */
+	ulint		zip_size,	/*!< in: compressed page size in
+					bytes, or 0 */
+	const ulint*	page_nos,	/*!< in: array of page numbers
+					to read, with the highest page
+					number the last in the
+					array */
+	ulint		n_stored);	/*!< in: number of page numbers
+					in the array */
+
+/** The size in pages of the area which the read-ahead algorithms read if
+invoked */
+#define	BUF_READ_AHEAD_AREA(b)					\
+	ut_min(64, ut_2_power_up((b)->curr_size / 32))
+
+/** @name Modes used in read-ahead @{ */
+/** read only pages belonging to the insert buffer tree */
+#define BUF_READ_IBUF_PAGES_ONLY	131
+/** read any page */
+#define BUF_READ_ANY_PAGE		132
+/** read any page, but ignore (return an error) if a page does not exist
+instead of crashing like BUF_READ_ANY_PAGE does */
+#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024
+/* @} */
+
+#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
new file mode 100644
index 00000000000..11bbc9b5c8a
--- /dev/null
+++ b/storage/innobase/include/buf0types.h
@@ -0,0 +1,120 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0types.h
+The database buffer pool global types for the directory
+
+Created 11/17/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0types_h
+#define buf0types_h
+
+#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS)
+#define PAGE_ATOMIC_REF_COUNT
+#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */
+
+/** Buffer page (uncompressed or compressed) */
+struct buf_page_t;
+/** Buffer block for which an uncompressed page exists */
+struct buf_block_t;
+/** Buffer pool chunk comprising buf_block_t */
+struct buf_chunk_t;
+/** Buffer pool comprising buf_chunk_t */
+struct buf_pool_t;
+/** Buffer pool statistics struct */
+struct buf_pool_stat_t;
+/** Buffer pool buddy statistics struct */
+struct buf_buddy_stat_t;
+/** Doublewrite memory struct */
+struct buf_dblwr_t;
+
+/** A buffer frame. @see page_t */
+typedef	byte	buf_frame_t;
+
+/** Flags for flush types */
+enum buf_flush_t {
+	BUF_FLUSH_LRU = 0,		/*!< flush via the LRU list */
+	BUF_FLUSH_LIST,			/*!< flush via the flush list
+					of dirty blocks */
+	BUF_FLUSH_SINGLE_PAGE,		/*!< flush via the LRU list
+					but only a single page */
+	BUF_FLUSH_N_TYPES		/*!< index of last element + 1  */
+};
+
+/** Algorithm to remove the pages for a tablespace from the buffer pool.
+See buf_LRU_flush_or_remove_pages(). */
+enum buf_remove_t {
+	BUF_REMOVE_ALL_NO_WRITE,	/*!< Remove all pages from the buffer
+					pool, don't write or sync to disk */
+	BUF_REMOVE_FLUSH_NO_WRITE,	/*!< Remove only, from the flush list,
+					don't write or sync to disk */
+	BUF_REMOVE_FLUSH_WRITE		/*!< Flush dirty pages to disk only
+					don't remove from the buffer pool */
+};
+
+/** Flags for io_fix types */
+enum buf_io_fix {
+	BUF_IO_NONE = 0,		/**< no pending I/O */
+	BUF_IO_READ,			/**< read pending */
+	BUF_IO_WRITE,			/**< write pending */
+	BUF_IO_PIN			/**< disallow relocation of
+					block and its removal of from
+					the flush_list */
+};
+
+/** Alternatives for srv_checksum_algorithm, which can be changed by
+setting innodb_checksum_algorithm */
+enum srv_checksum_algorithm_t {
+	SRV_CHECKSUM_ALGORITHM_CRC32,		/*!< Write crc32, allow crc32,
+						innodb or none when reading */
+	SRV_CHECKSUM_ALGORITHM_STRICT_CRC32,	/*!< Write crc32, allow crc32
+						when reading */
+	SRV_CHECKSUM_ALGORITHM_INNODB,		/*!< Write innodb, allow crc32,
+						innodb or none when reading */
+	SRV_CHECKSUM_ALGORITHM_STRICT_INNODB,	/*!< Write innodb, allow
+						innodb when reading */
+	SRV_CHECKSUM_ALGORITHM_NONE,		/*!< Write none, allow crc32,
+						innodb or none when reading */
+	SRV_CHECKSUM_ALGORITHM_STRICT_NONE	/*!< Write none, allow none
+						when reading */
+};
+
+/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
+/* @{ */
+/** Zip shift value for the smallest page size */
+#define BUF_BUDDY_LOW_SHIFT	UNIV_ZIP_SIZE_SHIFT_MIN
+
+/** Smallest buddy page size */
+#define BUF_BUDDY_LOW		(1U << BUF_BUDDY_LOW_SHIFT)
+
+/** Actual number of buddy sizes based on current page size */
+#define BUF_BUDDY_SIZES		(UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
+
+/** Maximum number of buddy sizes based on the max page size */
+#define BUF_BUDDY_SIZES_MAX	(UNIV_PAGE_SIZE_SHIFT_MAX	\
+				- BUF_BUDDY_LOW_SHIFT)
+
+/** twice the maximum block size of the buddy system;
+the underlying memory is aligned by this amount:
+this must be equal to UNIV_PAGE_SIZE */
+#define BUF_BUDDY_HIGH	(BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
+/* @} */
+
+#endif /* buf0types.h */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
new file mode 100644
index 00000000000..a548c7b89b3
--- /dev/null
+++ b/storage/innobase/include/data0data.h
@@ -0,0 +1,536 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0data.h
+SQL data field and tuple
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0data_h
+#define data0data_h
+
+#include "univ.i"
+
+#include "data0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+#include "dict0types.h"
+
+/** Storage for overflow data in a big record, that is, a clustered
+index record which needs external storage of data fields */
+struct big_rec_t;
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets pointer to the type struct of SQL data field.
+@return	pointer to the type struct */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+	const dfield_t*	field)	/*!< in: SQL data field */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets pointer to the data in a field.
+@return	pointer to data */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
+#else /* UNIV_DEBUG */
+# define dfield_get_type(field) (&(field)->type)
+# define dfield_get_data(field) ((field)->data)
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/*!< in: SQL data field */
+	const dtype_t*	type)	/*!< in: pointer to data type struct */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Gets length of field data.
+@return	length of data; UNIV_SQL_NULL if SQL null data */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Sets length in a field. */
+UNIV_INLINE
+void
+dfield_set_len(
+/*===========*/
+	dfield_t*	field,	/*!< in: field */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Determines if a field is SQL NULL
+@return	nonzero if SQL null data */
+UNIV_INLINE
+ulint
+dfield_is_null(
+/*===========*/
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Determines if a field is externally stored
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+dfield_is_ext(
+/*==========*/
+	const dfield_t* field)	/*!< in: field */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Sets the "external storage" flag */
+UNIV_INLINE
+void
+dfield_set_ext(
+/*===========*/
+	dfield_t*	field)	/*!< in/out: field */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_set_data(
+/*============*/
+	dfield_t*	field,	/*!< in: field */
+	const void*	data,	/*!< in: data */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+	__attribute__((nonnull(1)));
+/*********************************************************************//**
+Sets a data field to SQL NULL. */
+UNIV_INLINE
+void
+dfield_set_null(
+/*============*/
+	dfield_t*	field)	/*!< in/out: field */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/*!< in: pointer to a buffer of size len */
+	ulint	len)	/*!< in: SQL null size in bytes */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Copies the data and len fields. */
+UNIV_INLINE
+void
+dfield_copy_data(
+/*=============*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Copies the data pointed to by a data field. */
+UNIV_INLINE
+void
+dfield_dup(
+/*=======*/
+	dfield_t*	field,	/*!< in/out: data field */
+	mem_heap_t*	heap)	/*!< in: memory heap where allocated */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Tests if two data fields are equal.
+If len==0, tests the data length and content for equality.
+If len>0, tests the first len bytes of the content for equality.
+@return	TRUE if both fields are NULL or if they are equal */
+UNIV_INLINE
+ibool
+dfield_datas_are_binary_equal(
+/*==========================*/
+	const dfield_t*	field1,	/*!< in: field */
+	const dfield_t*	field2,	/*!< in: field */
+	ulint		len)	/*!< in: maximum prefix to compare,
+				or 0 to compare the whole field length */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return	TRUE if equal */
+UNIV_INLINE
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+	const dfield_t*	field,	/*!< in: field */
+	ulint		len,	/*!< in: data length or UNIV_SQL_NULL */
+	const byte*	data)	/*!< in: data */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets number of fields in a data tuple.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets nth field of a tuple.
+@return	nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_field(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: tuple */
+	ulint		n);	/*!< in: index of field */
+#else /* UNIV_DEBUG */
+# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Gets info bits in a data tuple.
+@return	info bits */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		info_bits)	/*!< in: info bits */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Gets number of fields used in record comparisons.
+@return	number of fields used in comparisons in rem0cmp.* */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields_cmp)	/*!< in: number of fields used in
+					comparisons in rem0cmp.* */
+	__attribute__((nonnull));
+
+/* Estimate the number of bytes that are going to be allocated when
+creating a new dtuple_t object */
+#define DTUPLE_EST_ALLOC(n_fields)	\
+	(sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t))
+
+/**********************************************************//**
+Creates a data tuple from an already allocated chunk of memory.
+The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
+The default value for number of fields used in record comparisons
+for this tuple is n_fields.
+@return	created tuple (inside buf) */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_from_mem(
+/*===================*/
+	void*	buf,		/*!< in, out: buffer to use */
+	ulint	buf_size,	/*!< in: buffer size */
+	ulint	n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull, warn_unused_result));
+
+/**********************************************************//**
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return	own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	mem_heap_t*	heap,	/*!< in: memory heap where the tuple
+				is created, DTUPLE_EST_ALLOC(n_fields)
+				bytes will be allocated from this heap */
+	ulint		n_fields)/*!< in: number of fields */
+	__attribute__((nonnull, malloc));
+
+/*********************************************************************//**
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */
+UNIV_INTERN
+void
+dtuple_set_n_fields(
+/*================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Copies a data tuple to another.  This is a shallow copy; if a deep copy
+is desired, dfield_dup() will have to be invoked on each field.
+@return	own: copy of tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_copy(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: tuple to copy from */
+	mem_heap_t*	heap)	/*!< in: memory heap
+				where the tuple is created */
+	__attribute__((nonnull, malloc));
+/**********************************************************//**
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted.
+@return	sum of data lens */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: typed data tuple */
+	ulint		comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Computes the number of externally stored fields in a data tuple.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_ext(
+/*=============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull));
+/************************************************************//**
+Compare two data tuples, respecting the collation of character fields.
+@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
+than tuple2 */
+UNIV_INTERN
+int
+dtuple_coll_cmp(
+/*============*/
+	const dtuple_t*	tuple1,	/*!< in: tuple 1 */
+	const dtuple_t*	tuple2)	/*!< in: tuple 2 */
+	__attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Folds a prefix given as the number of fields of a tuple.
+@return	the folded value */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: the tuple */
+	ulint		n_fields,/*!< in: number of complete fields to fold */
+	ulint		n_bytes,/*!< in: number of bytes to fold in an
+				incomplete last field */
+	index_id_t	tree_id)/*!< in: index tree id */
+	__attribute__((nonnull, pure, warn_unused_result));
+/*******************************************************************//**
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/*!< in: data tuple */
+	ulint		n)	/*!< in: number of fields to set */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Checks if a dtuple contains an SQL null value.
+@return	TRUE if some field is SQL null */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+	const dtuple_t*	tuple)	/*!< in: dtuple */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************//**
+Checks that a data field is typed. Asserts an error if not.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dfield_check_typed(
+/*===============*/
+	const dfield_t*	field)	/*!< in: data field */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************//**
+Checks that a data tuple is typed. Asserts an error if not.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_check_typed(
+/*===============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************//**
+Checks that a data tuple is typed.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+/**********************************************************//**
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtuple_validate(
+/*============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+/*************************************************************//**
+Pretty prints a dfield value according to its data type. */
+UNIV_INTERN
+void
+dfield_print(
+/*=========*/
+	const dfield_t*	dfield)	/*!< in: dfield */
+	__attribute__((nonnull));
+/*************************************************************//**
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */
+UNIV_INTERN
+void
+dfield_print_also_hex(
+/*==================*/
+	const dfield_t*	dfield)	 /*!< in: dfield */
+	__attribute__((nonnull));
+/**********************************************************//**
+The following function prints the contents of a tuple. */
+UNIV_INTERN
+void
+dtuple_print(
+/*=========*/
+	FILE*		f,	/*!< in: output stream */
+	const dtuple_t*	tuple)	/*!< in: tuple */
+	__attribute__((nonnull));
+/**************************************************************//**
+Moves parts of long fields in entry to the big record vector so that
+the size of tuple drops below the maximum record size allowed in the
+database. Moves data only from those fields which are not necessary
+to determine uniquely the insertion place of the tuple in the index.
+@return own: created big record vector, NULL if we are not able to
+shorten the entry enough, i.e., if there are too many fixed-length or
+short fields in entry or the index is clustered */
+UNIV_INTERN
+big_rec_t*
+dtuple_convert_big_rec(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in/out: index entry */
+	ulint*		n_ext)	/*!< in/out: number of
+				externally stored columns */
+	__attribute__((nonnull, malloc, warn_unused_result));
+/**************************************************************//**
+Puts back to entry the data stored in vector. Note that to ensure the
+fields in entry can accommodate the data, vector must have been created
+from entry with dtuple_convert_big_rec. */
+UNIV_INTERN
+void
+dtuple_convert_back_big_rec(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	dtuple_t*	entry,	/*!< in: entry whose data was put to vector */
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
+				freed in this function */
+	__attribute__((nonnull));
+/**************************************************************//**
+Frees the memory in a big rec vector. */
+UNIV_INLINE
+void
+dtuple_big_rec_free(
+/*================*/
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
+				freed in this function */
+	__attribute__((nonnull));
+
+/*######################################################################*/
+
+/** Structure for an SQL data field */
+struct dfield_t{
+	void*		data;	/*!< pointer to data */
+	unsigned	ext:1;	/*!< TRUE=externally stored, FALSE=local */
+	unsigned	len:32;	/*!< data length; UNIV_SQL_NULL if SQL null */
+	dtype_t		type;	/*!< type of data */
+};
+
+/** Structure for an SQL data tuple of fields (logical record) */
+struct dtuple_t {
+	ulint		info_bits;	/*!< info bits of an index record:
+					the default is 0; this field is used
+					if an index record is built from
+					a data tuple */
+	ulint		n_fields;	/*!< number of fields in dtuple */
+	ulint		n_fields_cmp;	/*!< number of fields which should
+					be used in comparison services
+					of rem0cmp.*; the index search
+					is performed by comparing only these
+					fields, others are ignored; the
+					default value in dtuple creation is
+					the same value as n_fields */
+	dfield_t*	fields;		/*!< fields */
+	UT_LIST_NODE_T(dtuple_t) tuple_list;
+					/*!< data tuples can be linked into a
+					list using this field */
+#ifdef UNIV_DEBUG
+	ulint		magic_n;	/*!< magic number, used in
+					debug assertions */
+/** Value of dtuple_t::magic_n */
+# define		DATA_TUPLE_MAGIC_N	65478679
+#endif /* UNIV_DEBUG */
+};
+
+/** A slot for a field in a big rec vector */
+struct big_rec_field_t {
+	ulint		field_no;	/*!< field number in record */
+	ulint		len;		/*!< stored data length, in bytes */
+	const void*	data;		/*!< stored data */
+};
+
+/** Storage format for overflow data in a big record, that is, a
+clustered index record which needs external storage of data fields */
+struct big_rec_t {
+	mem_heap_t*	heap;		/*!< memory heap from which
+					allocated */
+	ulint		n_fields;	/*!< number of stored fields */
+	big_rec_field_t*fields;		/*!< stored fields */
+};
+
+#ifndef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
new file mode 100644
index 00000000000..6937d55d211
--- /dev/null
+++ b/storage/innobase/include/data0data.ic
@@ -0,0 +1,649 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0data.ic
+SQL data field and tuple
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0mem.h"
+#include "ut0rnd.h"
+
+#ifdef UNIV_DEBUG
+/** Dummy variable to catch access to uninitialized fields.  In the
+debug version, dtuple_create() will make all fields of dtuple_t point
+to data_error. */
+extern byte data_error;
+
+/*********************************************************************//**
+Gets pointer to the type struct of SQL data field.
+@return	pointer to the type struct */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+	const dfield_t*	field)	/*!< in: SQL data field */
+{
+	ut_ad(field);
+
+	return((dtype_t*) &(field->type));
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/*!< in: SQL data field */
+	const dtype_t*	type)	/*!< in: pointer to data type struct */
+{
+	ut_ad(field && type);
+
+	field->type = *type;
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets pointer to the data in a field.
+@return	pointer to data */
+UNIV_INLINE
+void*
+dfield_get_data(
+/*============*/
+	const dfield_t* field)	/*!< in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error));
+
+	return((void*) field->data);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets length of field data.
+@return	length of data; UNIV_SQL_NULL if SQL null data */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+	const dfield_t*	field)	/*!< in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error));
+
+	return(field->len);
+}
+
+/*********************************************************************//**
+Sets length in a field. */
+UNIV_INLINE
+void
+dfield_set_len(
+/*===========*/
+	dfield_t*	field,	/*!< in: field */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+#ifdef UNIV_VALGRIND_DEBUG
+	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+
+	field->ext = 0;
+	field->len = len;
+}
+
+/*********************************************************************//**
+Determines if a field is SQL NULL
+@return	nonzero if SQL null data */
+UNIV_INLINE
+ulint
+dfield_is_null(
+/*===========*/
+	const dfield_t* field)	/*!< in: field */
+{
+	ut_ad(field);
+
+	return(field->len == UNIV_SQL_NULL);
+}
+
+/*********************************************************************//**
+Determines if a field is externally stored
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+dfield_is_ext(
+/*==========*/
+	const dfield_t* field)	/*!< in: field */
+{
+	ut_ad(field);
+
+	return(field->ext);
+}
+
+/*********************************************************************//**
+Sets the "external storage" flag */
+UNIV_INLINE
+void
+dfield_set_ext(
+/*===========*/
+	dfield_t*	field)	/*!< in/out: field */
+{
+	ut_ad(field);
+
+	field->ext = 1;
+}
+
+/*********************************************************************//**
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void
+dfield_set_data(
+/*============*/
+	dfield_t*	field,	/*!< in: field */
+	const void*	data,	/*!< in: data */
+	ulint		len)	/*!< in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+
+#ifdef UNIV_VALGRIND_DEBUG
+	if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
+#endif /* UNIV_VALGRIND_DEBUG */
+	field->data = (void*) data;
+	field->ext = 0;
+	field->len = len;
+}
+
+/*********************************************************************//**
+Sets a data field to SQL NULL. */
+UNIV_INLINE
+void
+dfield_set_null(
+/*============*/
+	dfield_t*	field)	/*!< in/out: field */
+{
+	dfield_set_data(field, NULL, UNIV_SQL_NULL);
+}
+
+/*********************************************************************//**
+Copies the data and len fields. */
+UNIV_INLINE
+void
+dfield_copy_data(
+/*=============*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+{
+	ut_ad(field1 && field2);
+
+	field1->data = field2->data;
+	field1->len = field2->len;
+	field1->ext = field2->ext;
+}
+
+/*********************************************************************//**
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/*!< out: field to copy to */
+	const dfield_t*	field2)	/*!< in: field to copy from */
+{
+	*field1 = *field2;
+}
+
+/*********************************************************************//**
+Copies the data pointed to by a data field. */
+UNIV_INLINE
+void
+dfield_dup(
+/*=======*/
+	dfield_t*	field,	/*!< in/out: data field */
+	mem_heap_t*	heap)	/*!< in: memory heap where allocated */
+{
+	if (!dfield_is_null(field)) {
+		UNIV_MEM_ASSERT_RW(field->data, field->len);
+		field->data = mem_heap_dup(heap, field->data, field->len);
+	}
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Tests if two data fields are equal.
+If len==0, tests the data length and content for equality.
+If len>0, tests the first len bytes of the content for equality.
+@return	TRUE if both fields are NULL or if they are equal */
+UNIV_INLINE
+ibool
+dfield_datas_are_binary_equal(
+/*==========================*/
+	const dfield_t*	field1,	/*!< in: field */
+	const dfield_t*	field2,	/*!< in: field */
+	ulint		len)	/*!< in: maximum prefix to compare,
+				or 0 to compare the whole field length */
+{
+	ulint	len2 = len;
+
+	if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) {
+		len = field1->len;
+	}
+
+	if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) {
+		len2 = field2->len;
+	}
+
+	return(len == len2
+	       && (len == UNIV_SQL_NULL
+		   || !memcmp(field1->data, field2->data, len)));
+}
+
+/*********************************************************************//**
+Tests if dfield data length and content is equal to the given.
+@return	TRUE if equal */
+UNIV_INLINE
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+	const dfield_t*	field,	/*!< in: field */
+	ulint		len,	/*!< in: data length or UNIV_SQL_NULL */
+	const byte*	data)	/*!< in: data */
+{
+	return(len == dfield_get_len(field)
+	       && (len == UNIV_SQL_NULL
+		   || !memcmp(dfield_get_data(field), data, len)));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Gets info bits in a data tuple.
+@return	info bits */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->info_bits);
+}
+
+/*********************************************************************//**
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		info_bits)	/*!< in: info bits */
+{
+	ut_ad(tuple);
+
+	tuple->info_bits = info_bits;
+}
+
+/*********************************************************************//**
+Gets number of fields used in record comparisons.
+@return	number of fields used in comparisons in rem0cmp.* */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields_cmp);
+}
+
+/*********************************************************************//**
+Sets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/*!< in: tuple */
+	ulint		n_fields_cmp)	/*!< in: number of fields used in
+					comparisons in rem0cmp.* */
+{
+	ut_ad(tuple);
+	ut_ad(n_fields_cmp <= tuple->n_fields);
+
+	tuple->n_fields_cmp = n_fields_cmp;
+}
+
+/*********************************************************************//**
+Gets number of fields in a data tuple.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Gets nth field of a tuple.
+@return	nth field */
+UNIV_INLINE
+dfield_t*
+dtuple_get_nth_field(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: tuple */
+	ulint		n)	/*!< in: index of field */
+{
+	ut_ad(tuple);
+	ut_ad(n < tuple->n_fields);
+
+	return((dfield_t*) tuple->fields + n);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************//**
+Creates a data tuple from an already allocated chunk of memory.
+The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
+The default value for number of fields used in record comparisons
+for this tuple is n_fields.
+@return	created tuple (inside buf) */
+UNIV_INLINE
+dtuple_t*
+dtuple_create_from_mem(
+/*===================*/
+	void*	buf,		/*!< in, out: buffer to use */
+	ulint	buf_size,	/*!< in: buffer size */
+	ulint	n_fields)	/*!< in: number of fields */
+{
+	dtuple_t*	tuple;
+
+	ut_ad(buf != NULL);
+	ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields));
+
+	tuple = (dtuple_t*) buf;
+	tuple->info_bits = 0;
+	tuple->n_fields = n_fields;
+	tuple->n_fields_cmp = n_fields;
+	tuple->fields = (dfield_t*) &tuple[1];
+
+#ifdef UNIV_DEBUG
+	tuple->magic_n = DATA_TUPLE_MAGIC_N;
+
+	{	/* In the debug version, initialize fields to an error value */
+		ulint	i;
+
+		for (i = 0; i < n_fields; i++) {
+			dfield_t*       field;
+
+			field = dtuple_get_nth_field(tuple, i);
+
+			dfield_set_len(field, UNIV_SQL_NULL);
+			field->data = &data_error;
+			dfield_get_type(field)->mtype = DATA_ERROR;
+		}
+	}
+#endif
+	UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
+	UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
+	return(tuple);
+}
+
+/**********************************************************//**
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields.
+@return	own: created tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	mem_heap_t*	heap,	/*!< in: memory heap where the tuple
+				is created, DTUPLE_EST_ALLOC(n_fields)
+				bytes will be allocated from this heap */
+	ulint		n_fields) /*!< in: number of fields */
+{
+	void*		buf;
+	ulint		buf_size;
+	dtuple_t*	tuple;
+
+	ut_ad(heap);
+
+	buf_size = DTUPLE_EST_ALLOC(n_fields);
+	buf = mem_heap_alloc(heap, buf_size);
+
+	tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
+
+	return(tuple);
+}
+
+/*********************************************************************//**
+Copies a data tuple to another.  This is a shallow copy; if a deep copy
+is desired, dfield_dup() will have to be invoked on each field.
+@return	own: copy of tuple */
+UNIV_INLINE
+dtuple_t*
+dtuple_copy(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: tuple to copy from */
+	mem_heap_t*	heap)	/*!< in: memory heap
+				where the tuple is created */
+{
+	ulint		n_fields	= dtuple_get_n_fields(tuple);
+	dtuple_t*	new_tuple	= dtuple_create(heap, n_fields);
+	ulint		i;
+
+	for (i = 0; i < n_fields; i++) {
+		dfield_copy(dtuple_get_nth_field(new_tuple, i),
+			    dtuple_get_nth_field(tuple, i));
+	}
+
+	return(new_tuple);
+}
+
+/**********************************************************//**
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. Neither
+is possible space in externally stored parts of the field.
+@return	sum of data lengths */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+	const dtuple_t*	tuple,	/*!< in: typed data tuple */
+	ulint		comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+{
+	const dfield_t*	field;
+	ulint		n_fields;
+	ulint		len;
+	ulint		i;
+	ulint		sum	= 0;
+
+	ut_ad(tuple);
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	n_fields = tuple->n_fields;
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple,  i);
+		len = dfield_get_len(field);
+
+		if (len == UNIV_SQL_NULL) {
+			len = dtype_get_sql_null_size(dfield_get_type(field),
+						      comp);
+		}
+
+		sum += len;
+	}
+
+	return(sum);
+}
+
+/*********************************************************************//**
+Computes the number of externally stored fields in a data tuple.
+@return	number of externally stored fields */
+UNIV_INLINE
+ulint
+dtuple_get_n_ext(
+/*=============*/
+	const dtuple_t*	tuple)	/*!< in: tuple */
+{
+	ulint	n_ext		= 0;
+	ulint	n_fields	= tuple->n_fields;
+	ulint	i;
+
+	ut_ad(tuple);
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	for (i = 0; i < n_fields; i++) {
+		n_ext += dtuple_get_nth_field(tuple, i)->ext;
+	}
+
+	return(n_ext);
+}
+
+/*******************************************************************//**
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/*!< in: data tuple */
+	ulint		n)	/*!< in: number of fields to set */
+{
+	dtype_t*	dfield_type;
+	ulint		i;
+
+	for (i = 0; i < n; i++) {
+		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+		dtype_set(dfield_type, DATA_BINARY, 0, 0);
+	}
+}
+
+/************************************************************//**
+Folds a prefix given as the number of fields of a tuple.
+@return	the folded value */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+	const dtuple_t*	tuple,	/*!< in: the tuple */
+	ulint		n_fields,/*!< in: number of complete fields to fold */
+	ulint		n_bytes,/*!< in: number of bytes to fold in an
+				incomplete last field */
+	index_id_t	tree_id)/*!< in: index tree id */
+{
+	const dfield_t*	field;
+	ulint		i;
+	const byte*	data;
+	ulint		len;
+	ulint		fold;
+
+	ut_ad(tuple);
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple));
+
+	fold = ut_fold_ull(tree_id);
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (const byte*) dfield_get_data(field);
+		len = dfield_get_len(field);
+
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (const byte*) dfield_get_data(field);
+		len = dfield_get_len(field);
+
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	return(fold);
+}
+
+/**********************************************************************//**
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/*!< in: pointer to a buffer of size len */
+	ulint	len)	/*!< in: SQL null size in bytes */
+{
+	memset(data, 0, len);
+}
+
+/**********************************************************************//**
+Checks if a dtuple contains an SQL null value.
+@return	TRUE if some field is SQL null */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+	const dtuple_t*	tuple)	/*!< in: dtuple */
+{
+	ulint	n;
+	ulint	i;
+
+	n = dtuple_get_n_fields(tuple);
+
+	for (i = 0; i < n; i++) {
+		if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/**************************************************************//**
+Frees the memory in a big rec vector. */
+UNIV_INLINE
+void
+dtuple_big_rec_free(
+/*================*/
+	big_rec_t*	vector)	/*!< in, own: big rec vector; it is
+				freed in this function */
+{
+	mem_heap_free(vector->heap);
+}
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
new file mode 100644
index 00000000000..111664b0b52
--- /dev/null
+++ b/storage/innobase/include/data0type.h
@@ -0,0 +1,544 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/data0type.h
+Data types
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef data0type_h
+#define data0type_h
+
+#include "univ.i"
+
+extern ulint	data_mysql_default_charset_coll;
+#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
+#define DATA_MYSQL_BINARY_CHARSET_COLL 63
+
+/* SQL data type struct */
+struct dtype_t;
+
+/* SQL Like operator comparison types */
+enum ib_like_t {
+	IB_LIKE_EXACT,                  /* e.g.  STRING */
+	IB_LIKE_PREFIX,                 /* e.g., STRING% */
+	IB_LIKE_SUFFIX,                 /* e.g., %STRING */
+	IB_LIKE_SUBSTR,                 /* e.g., %STRING% */
+	IB_LIKE_REGEXP                  /* Future */
+};
+
+/*-------------------------------------------*/
+/* The 'MAIN TYPE' of a column */
+#define DATA_MISSING	0	/* missing column */
+#define	DATA_VARCHAR	1	/* character varying of the
+				latin1_swedish_ci charset-collation; note
+				that the MySQL format for this, DATA_BINARY,
+				DATA_VARMYSQL, is also affected by whether the
+				'precise type' contains
+				DATA_MYSQL_TRUE_VARCHAR */
+#define DATA_CHAR	2	/* fixed length character of the
+				latin1_swedish_ci charset-collation */
+#define DATA_FIXBINARY	3	/* binary string of fixed length */
+#define DATA_BINARY	4	/* binary string */
+#define DATA_BLOB	5	/* binary large object, or a TEXT type;
+				if prtype & DATA_BINARY_TYPE == 0, then this is
+				actually a TEXT column (or a BLOB created
+				with < 4.0.14; since column prefix indexes
+				came only in 4.0.14, the missing flag in BLOBs
+				created before that does not cause any harm) */
+#define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
+#define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
+#define	DATA_SYS	8	/* system column */
+
+/* Data types >= DATA_FLOAT must be compared using the whole field, not as
+binary strings */
+
+#define DATA_FLOAT	9
+#define DATA_DOUBLE	10
+#define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
+#define	DATA_VARMYSQL	12	/* any charset varying length char */
+#define	DATA_MYSQL	13	/* any charset fixed length char */
+				/* NOTE that 4.1.1 used DATA_MYSQL and
+				DATA_VARMYSQL for all character sets, and the
+				charset-collation for tables created with it
+				can also be latin1_swedish_ci */
+#define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
+				requires the values are <= 63 */
+/*-------------------------------------------*/
+/* The 'PRECISE TYPE' of a column */
+/*
+Tables created by a MySQL user have the following convention:
+
+- In the least significant byte in the precise type we store the MySQL type
+code (not applicable for system columns).
+
+- In the second least significant byte we OR flags DATA_NOT_NULL,
+DATA_UNSIGNED, DATA_BINARY_TYPE.
+
+- In the third least significant byte of the precise type of string types we
+store the MySQL charset-collation code. In DATA_BLOB columns created with
+< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
+are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
+problem, though.
+
+Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
+precise type, since the charset was always the default charset of the MySQL
+installation. If the stored charset code is 0 in the system table SYS_COLUMNS
+of InnoDB, that means that the default charset of this MySQL installation
+should be used.
+
+When loading a table definition from the system tables to the InnoDB data
+dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
+if the stored charset-collation is 0, and if that is the case and the type is
+a non-binary string, replace that 0 by the default charset-collation code of
+this MySQL installation. In short, in old tables, the charset-collation code
+in the system tables on disk can be 0, but in in-memory data structures
+(dtype_t), the charset-collation code is always != 0 for non-binary string
+types.
+
+In new tables, in binary string types, the charset-collation code is the
+MySQL code for the 'binary charset', that is, != 0.
+
+For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
+DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
+InnoDB performs all comparisons internally, without resorting to the MySQL
+comparison functions. This is to save CPU time.
+
+InnoDB's own internal system tables have different precise types for their
+columns, and for them the precise type is usually not used at all.
+*/
+
+#define DATA_ENGLISH	4	/* English language character string: this
+				is a relic from pre-MySQL time and only used
+				for InnoDB's own system tables */
+#define DATA_ERROR	111	/* another relic from pre-MySQL time */
+
+#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
+				 type from the precise type */
+#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
+				   format true VARCHAR */
+
+/* Precise data types for system columns and the length of those columns;
+NOTE: the values must run from 0 up in the order given! All codes must
+be less than 256 */
+#define	DATA_ROW_ID	0	/* row id: a 48-bit integer */
+#define DATA_ROW_ID_LEN	6	/* stored length for row id */
+
+#define DATA_TRX_ID	1	/* transaction id: 6 bytes */
+#define DATA_TRX_ID_LEN	6
+
+#define	DATA_ROLL_PTR	2	/* rollback data pointer: 7 bytes */
+#define DATA_ROLL_PTR_LEN 7
+
+#define	DATA_N_SYS_COLS 3	/* number of system columns defined above */
+
+#define DATA_FTS_DOC_ID	3	/* Used as FTS DOC ID column */
+
+#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
+
+/* Flags ORed to the precise data type */
+#define DATA_NOT_NULL	256	/* this is ORed to the precise type when
+				the column is declared as NOT NULL */
+#define DATA_UNSIGNED	512	/* this id ORed to the precise type when
+				we have an unsigned integer type */
+#define	DATA_BINARY_TYPE 1024	/* if the data type is a binary character
+				string, this is ORed to the precise type:
+				this only holds for tables created with
+				>= MySQL-4.0.14 */
+/* #define	DATA_NONLATIN1	2048 This is a relic from < 4.1.2 and < 5.0.1.
+				In earlier versions this was set for some
+				BLOB columns.
+*/
+#define	DATA_LONG_TRUE_VARCHAR 4096	/* this is ORed to the precise data
+				type when the column is true VARCHAR where
+				MySQL uses 2 bytes to store the data len;
+				for shorter VARCHARs MySQL uses only 1 byte */
+/*-------------------------------------------*/
+
+/* This many bytes we need to store the type information affecting the
+alphabetical order for a single field and decide the storage size of an
+SQL null*/
+#define DATA_ORDER_NULL_TYPE_BUF_SIZE		4
+/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
+store the charset-collation number; one byte is left unused, though */
+#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
+
+/* Maximum multi-byte character length in bytes, plus 1 */
+#define DATA_MBMAX	5
+
+/* Pack mbminlen, mbmaxlen to mbminmaxlen. */
+#define DATA_MBMINMAXLEN(mbminlen, mbmaxlen)	\
+	((mbmaxlen) * DATA_MBMAX + (mbminlen))
+/* Get mbminlen from mbminmaxlen. Cast the result of UNIV_EXPECT to ulint
+because in GCC it returns a long. */
+#define DATA_MBMINLEN(mbminmaxlen) ((ulint) \
+                                    UNIV_EXPECT(((mbminmaxlen) % DATA_MBMAX), \
+                                                1))
+/* Get mbmaxlen from mbminmaxlen. */
+#define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
+
+/* We now support 15 bits (up to 32767) collation number */
+#define MAX_CHAR_COLL_NUM	32767
+
+/* Mask to get the Charset Collation number (0x7fff) */
+#define CHAR_COLL_MASK		MAX_CHAR_COLL_NUM
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the MySQL type code from a dtype.
+@return	MySQL type code; this is NOT an InnoDB type code! */
+UNIV_INLINE
+ulint
+dtype_get_mysql_type(
+/*=================*/
+	const dtype_t*	type);	/*!< in: type struct */
+/*********************************************************************//**
+Determine how many bytes the first n characters of the given string occupy.
+If the string is shorter than n characters, returns the number of bytes
+the characters in the string occupy.
+@return	length of the prefix, in bytes */
+UNIV_INTERN
+ulint
+dtype_get_at_most_n_mbchars(
+/*========================*/
+	ulint		prtype,		/*!< in: precise type */
+	ulint		mbminmaxlen,	/*!< in: minimum and maximum length of
+					a multi-byte character */
+	ulint		prefix_len,	/*!< in: length of the requested
+					prefix, in characters, multiplied by
+					dtype_get_mbmaxlen(dtype) */
+	ulint		data_len,	/*!< in: length of str (in bytes) */
+	const char*	str);		/*!< in: the string whose prefix
+					length is being determined */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Checks if a data main type is a string type. Also a BLOB is considered a
+string type.
+@return	TRUE if string type */
+UNIV_INTERN
+ibool
+dtype_is_string_type(
+/*=================*/
+	ulint	mtype);	/*!< in: InnoDB main data type code: DATA_CHAR, ... */
+/*********************************************************************//**
+Checks if a type is a binary string type. Note that for tables created with
+< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
+those DATA_BLOB columns this function currently returns FALSE.
+@return	TRUE if binary string type */
+UNIV_INTERN
+ibool
+dtype_is_binary_string_type(
+/*========================*/
+	ulint	mtype,	/*!< in: main data type */
+	ulint	prtype);/*!< in: precise type */
+/*********************************************************************//**
+Checks if a type is a non-binary string type. That is, dtype_is_string_type is
+TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
+with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
+For those DATA_BLOB columns this function currently returns TRUE.
+@return	TRUE if non-binary string type */
+UNIV_INTERN
+ibool
+dtype_is_non_binary_string_type(
+/*============================*/
+	ulint	mtype,	/*!< in: main data type */
+	ulint	prtype);/*!< in: precise type */
+/*********************************************************************//**
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+	dtype_t*	type,	/*!< in: type struct to init */
+	ulint		mtype,	/*!< in: main data type */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len);	/*!< in: precision of type */
+/*********************************************************************//**
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+	dtype_t*	type1,	/*!< in: type struct to copy to */
+	const dtype_t*	type2);	/*!< in: type struct to copy from */
+/*********************************************************************//**
+Gets the SQL main data type.
+@return	SQL main data type */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+	const dtype_t*	type);	/*!< in: data type */
+/*********************************************************************//**
+Gets the precise data type.
+@return	precise data type */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+	const dtype_t*	type);	/*!< in: data type */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Compute the mbminlen and mbmaxlen members of a data type structure. */
+UNIV_INLINE
+void
+dtype_get_mblen(
+/*============*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type (and collation) */
+	ulint*	mbminlen,	/*!< out: minimum length of a
+				multi-byte character */
+	ulint*	mbmaxlen);	/*!< out: maximum length of a
+				multi-byte character */
+/*********************************************************************//**
+Gets the MySQL charset-collation code for MySQL string types.
+@return	MySQL charset-collation code */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+	ulint	prtype);/*!< in: precise data type */
+/*********************************************************************//**
+Forms a precise type from the < 4.1.2 format precise type plus the
+charset-collation code.
+@return precise type, including the charset-collation code */
+UNIV_INTERN
+ulint
+dtype_form_prtype(
+/*==============*/
+	ulint	old_prtype,	/*!< in: the MySQL type code and the flags
+				DATA_BINARY_TYPE etc. */
+	ulint	charset_coll);	/*!< in: MySQL charset-collation code */
+/*********************************************************************//**
+Determines if a MySQL string type is a subset of UTF-8.  This function
+may return false negatives, in case further character-set collation
+codes are introduced in MySQL later.
+@return	TRUE if a subset of UTF-8 */
+UNIV_INLINE
+ibool
+dtype_is_utf8(
+/*==========*/
+	ulint	prtype);/*!< in: precise data type */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets the type length.
+@return	fixed length of the type, in bytes, or 0 if variable-length */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+	const dtype_t*	type);	/*!< in: data type */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the minimum length of a character, in bytes.
+@return minimum length of a char, in bytes, or 0 if this is not a
+character type */
+UNIV_INLINE
+ulint
+dtype_get_mbminlen(
+/*===============*/
+	const dtype_t*	type);	/*!< in: type */
+/*********************************************************************//**
+Gets the maximum length of a character, in bytes.
+@return maximum length of a char, in bytes, or 0 if this is not a
+character type */
+UNIV_INLINE
+ulint
+dtype_get_mbmaxlen(
+/*===============*/
+	const dtype_t*	type);	/*!< in: type */
+/*********************************************************************//**
+Sets the minimum and maximum length of a character, in bytes. */
+UNIV_INLINE
+void
+dtype_set_mbminmaxlen(
+/*==================*/
+	dtype_t*	type,		/*!< in/out: type */
+	ulint		mbminlen,	/*!< in: minimum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+	ulint		mbmaxlen);	/*!< in: maximum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+/*********************************************************************//**
+Gets the padding character code for the type.
+@return	padding character code, or ULINT_UNDEFINED if no padding specified */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype);	/*!< in: precise type */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the size of a fixed size data type, 0 if not a fixed size type.
+@return	fixed size, or 0 */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size_low(
+/*=====================*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminmaxlen,	/*!< in: minimum and maximum length of a
+				multibyte character, in bytes */
+	ulint	comp);		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of a data type.
+@return	minimum size */
+UNIV_INLINE
+ulint
+dtype_get_min_size_low(
+/*===================*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminmaxlen);	/*!< in: minimum and maximum length of a
+				multibyte character */
+/***********************************************************************//**
+Returns the maximum size of a data type. Note: types in system tables may be
+incomplete and return incorrect information.
+@return	maximum size */
+UNIV_INLINE
+ulint
+dtype_get_max_size_low(
+/*===================*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	len);		/*!< in: length */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+	const dtype_t*	type,	/*!< in: type */
+	ulint		comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf);	/*!< in: buffer for the stored order info */
+/**********************************************************************//**
+Stores for a type the information which determines its alphabetical ordering
+and the storage size of an SQL NULL value. This is the >= 4.1.x storage
+format. */
+UNIV_INLINE
+void
+dtype_new_store_for_order_and_null_size(
+/*====================================*/
+	byte*		buf,	/*!< in: buffer for
+				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+				bytes where we store the info */
+	const dtype_t*	type,	/*!< in: type struct */
+	ulint		prefix_len);/*!< in: prefix length to
+				replace type->len, or 0 */
+/**********************************************************************//**
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
+format. */
+UNIV_INLINE
+void
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf);	/*!< in: buffer for stored type order info */
+
+/*********************************************************************//**
+Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
+@return the SQL type name */
+UNIV_INLINE
+char*
+dtype_sql_name(
+/*===========*/
+	unsigned	mtype,	/*!< in: mtype */
+	unsigned	prtype,	/*!< in: prtype */
+	unsigned	len,	/*!< in: len */
+	char*		name,	/*!< out: SQL name */
+	unsigned	name_sz);/*!< in: size of the name buffer */
+
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Validates a data type structure.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dtype_validate(
+/*===========*/
+	const dtype_t*	type);	/*!< in: type struct to validate */
+/*********************************************************************//**
+Prints a data type structure. */
+UNIV_INTERN
+void
+dtype_print(
+/*========*/
+	const dtype_t*	type);	/*!< in: type */
+
+/* Structure for an SQL data type.
+If you add fields to this structure, be sure to initialize them everywhere.
+This structure is initialized in the following functions:
+dtype_set()
+dtype_read_for_order_and_null_size()
+dtype_new_read_for_order_and_null_size()
+sym_tab_add_null_lit() */
+
+struct dtype_t{
+	unsigned	prtype:32;	/*!< precise type; MySQL data
+					type, charset code, flags to
+					indicate nullability,
+					signedness, whether this is a
+					binary string, whether this is
+					a true VARCHAR where MySQL
+					uses 2 bytes to store the length */
+	unsigned	mtype:8;	/*!< main data type */
+
+	/* the remaining fields do not affect alphabetical ordering: */
+
+	unsigned	len:16;		/*!< length; for MySQL data this
+					is field->pack_length(),
+					except that for a >= 5.0.3
+					type true VARCHAR this is the
+					maximum byte length of the
+					string data (in addition to
+					the string, MySQL uses 1 or 2
+					bytes to store the string length) */
+#ifndef UNIV_HOTBACKUP
+	unsigned	mbminmaxlen:5;	/*!< minimum and maximum length of a
+					character, in bytes;
+					DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
+					mbminlen=DATA_MBMINLEN(mbminmaxlen);
+					mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
+#endif /* !UNIV_HOTBACKUP */
+};
+
+#ifndef UNIV_NONINL
+#include "data0type.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
new file mode 100644
index 00000000000..d489bef89a8
--- /dev/null
+++ b/storage/innobase/include/data0type.ic
@@ -0,0 +1,711 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/data0type.ic
+Data types
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#include <string.h> /* strlen() */
+
+#include "mach0data.h"
+#ifndef UNIV_HOTBACKUP
+# include "ha_prototypes.h"
+
+/*********************************************************************//**
+Gets the MySQL charset-collation code for MySQL string types.
+@return	MySQL charset-collation code */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+	ulint	prtype)	/*!< in: precise data type */
+{
+	return((prtype >> 16) & CHAR_COLL_MASK);
+}
+
+/*********************************************************************//**
+Determines if a MySQL string type is a subset of UTF-8.  This function
+may return false negatives, in case further character-set collation
+codes are introduced in MySQL later.
+@return	TRUE if a subset of UTF-8 */
+UNIV_INLINE
+ibool
+dtype_is_utf8(
+/*==========*/
+	ulint	prtype)	/*!< in: precise data type */
+{
+	/* These codes have been copied from strings/ctype-extra.c
+	and strings/ctype-utf8.c. */
+	switch (dtype_get_charset_coll(prtype)) {
+	case 11: /* ascii_general_ci */
+	case 65: /* ascii_bin */
+	case 33: /* utf8_general_ci */
+	case 83: /* utf8_bin */
+	case 254: /* utf8_general_cs */
+			return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Gets the MySQL type code from a dtype.
+@return	MySQL type code; this is NOT an InnoDB type code! */
+UNIV_INLINE
+ulint
+dtype_get_mysql_type(
+/*=================*/
+	const dtype_t*	type)	/*!< in: type struct */
+{
+	return(type->prtype & 0xFFUL);
+}
+
+/*********************************************************************//**
+Compute the mbminlen and mbmaxlen members of a data type structure. */
+UNIV_INLINE
+void
+dtype_get_mblen(
+/*============*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type (and collation) */
+	ulint*	mbminlen,	/*!< out: minimum length of a
+				multi-byte character */
+	ulint*	mbmaxlen)	/*!< out: maximum length of a
+				multi-byte character */
+{
+	if (dtype_is_string_type(mtype)) {
+		innobase_get_cset_width(dtype_get_charset_coll(prtype),
+					mbminlen, mbmaxlen);
+		ut_ad(*mbminlen <= *mbmaxlen);
+		ut_ad(*mbminlen < DATA_MBMAX);
+		ut_ad(*mbmaxlen < DATA_MBMAX);
+	} else {
+		*mbminlen = *mbmaxlen = 0;
+	}
+}
+
+/*********************************************************************//**
+Sets the minimum and maximum length of a character, in bytes. */
+UNIV_INLINE
+void
+dtype_set_mbminmaxlen(
+/*==================*/
+	dtype_t*	type,		/*!< in/out: type */
+	ulint		mbminlen,	/*!< in: minimum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+	ulint		mbmaxlen)	/*!< in: maximum length of a char,
+					in bytes, or 0 if this is not
+					a character type */
+{
+	ut_ad(mbminlen < DATA_MBMAX);
+	ut_ad(mbmaxlen < DATA_MBMAX);
+	ut_ad(mbminlen <= mbmaxlen);
+
+	type->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
+}
+
+/*********************************************************************//**
+Compute the mbminlen and mbmaxlen members of a data type structure. */
+UNIV_INLINE
+void
+dtype_set_mblen(
+/*============*/
+	dtype_t*	type)	/*!< in/out: type */
+{
+	ulint	mbminlen;
+	ulint	mbmaxlen;
+
+	dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
+	dtype_set_mbminmaxlen(type, mbminlen, mbmaxlen);
+
+	ut_ad(dtype_validate(type));
+}
+#else /* !UNIV_HOTBACKUP */
+# define dtype_set_mblen(type) (void) 0
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+	dtype_t*	type,	/*!< in: type struct to init */
+	ulint		mtype,	/*!< in: main data type */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len)	/*!< in: precision of type */
+{
+	ut_ad(type);
+	ut_ad(mtype <= DATA_MTYPE_MAX);
+
+	type->mtype = mtype;
+	type->prtype = prtype;
+	type->len = len;
+
+	dtype_set_mblen(type);
+}
+
+/*********************************************************************//**
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+	dtype_t*	type1,	/*!< in: type struct to copy to */
+	const dtype_t*	type2)	/*!< in: type struct to copy from */
+{
+	*type1 = *type2;
+
+	ut_ad(dtype_validate(type1));
+}
+
+/*********************************************************************//**
+Gets the SQL main data type.
+@return	SQL main data type */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+	const dtype_t*	type)	/*!< in: data type */
+{
+	ut_ad(type);
+
+	return(type->mtype);
+}
+
+/*********************************************************************//**
+Gets the precise data type.
+@return	precise data type */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+	const dtype_t*	type)	/*!< in: data type */
+{
+	ut_ad(type);
+
+	return(type->prtype);
+}
+
+/*********************************************************************//**
+Gets the type length.
+@return	fixed length of the type, in bytes, or 0 if variable-length */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+	const dtype_t*	type)	/*!< in: data type */
+{
+	ut_ad(type);
+
+	return(type->len);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Gets the minimum length of a character, in bytes.
+@return minimum length of a char, in bytes, or 0 if this is not a
+character type */
+UNIV_INLINE
+ulint
+dtype_get_mbminlen(
+/*===============*/
+	const dtype_t*	type)	/*!< in: type */
+{
+	ut_ad(type);
+	return(DATA_MBMINLEN(type->mbminmaxlen));
+}
+/*********************************************************************//**
+Gets the maximum length of a character, in bytes.
+@return maximum length of a char, in bytes, or 0 if this is not a
+character type */
+UNIV_INLINE
+ulint
+dtype_get_mbmaxlen(
+/*===============*/
+	const dtype_t*	type)	/*!< in: type */
+{
+	ut_ad(type);
+	return(DATA_MBMAXLEN(type->mbminmaxlen));
+}
+
+/*********************************************************************//**
+Gets the padding character code for a type.
+@return	padding character code, or ULINT_UNDEFINED if no padding specified */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype)		/*!< in: precise type */
+{
+	switch (mtype) {
+	case DATA_FIXBINARY:
+	case DATA_BINARY:
+		if (dtype_get_charset_coll(prtype)
+		    == DATA_MYSQL_BINARY_CHARSET_COLL) {
+			/* Starting from 5.0.18, do not pad
+			VARBINARY or BINARY columns. */
+			return(ULINT_UNDEFINED);
+		}
+		/* Fall through */
+	case DATA_CHAR:
+	case DATA_VARCHAR:
+	case DATA_MYSQL:
+	case DATA_VARMYSQL:
+		/* Space is the padding character for all char and binary
+		strings, and starting from 5.0.3, also for TEXT strings. */
+
+		return(0x20);
+	case DATA_BLOB:
+		if (!(prtype & DATA_BINARY_TYPE)) {
+			return(0x20);
+		}
+		/* Fall through */
+	default:
+		/* No padding specified */
+		return(ULINT_UNDEFINED);
+	}
+}
+
+/**********************************************************************//**
+Stores for a type the information which determines its alphabetical ordering
+and the storage size of an SQL NULL value. This is the >= 4.1.x storage
+format. */
+UNIV_INLINE
+void
+dtype_new_store_for_order_and_null_size(
+/*====================================*/
+	byte*		buf,	/*!< in: buffer for
+				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+				bytes where we store the info */
+	const dtype_t*	type,	/*!< in: type struct */
+	ulint		prefix_len)/*!< in: prefix length to
+				replace type->len, or 0 */
+{
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+	ulint	len;
+
+	ut_ad(type);
+	ut_ad(type->mtype >= DATA_VARCHAR);
+	ut_ad(type->mtype <= DATA_MYSQL);
+
+	buf[0] = (byte)(type->mtype & 0xFFUL);
+
+	if (type->prtype & DATA_BINARY_TYPE) {
+		buf[0] |= 128;
+	}
+
+	/* In versions < 4.1.2 we had:	if (type->prtype & DATA_NONLATIN1) {
+	buf[0] |= 64;
+	}
+	*/
+
+	buf[1] = (byte)(type->prtype & 0xFFUL);
+
+	len = prefix_len ? prefix_len : type->len;
+
+	mach_write_to_2(buf + 2, len & 0xFFFFUL);
+
+	ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM);
+	mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
+
+	if (type->prtype & DATA_NOT_NULL) {
+		buf[4] |= 128;
+	}
+}
+
+/**********************************************************************//**
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the < 4.1.x
+storage format. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf)	/*!< in: buffer for stored type order info */
+{
+#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
+# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+
+	type->mtype = buf[0] & 63;
+	type->prtype = buf[1];
+
+	if (buf[0] & 128) {
+		type->prtype |= DATA_BINARY_TYPE;
+	}
+
+	type->len = mach_read_from_2(buf + 2);
+
+	type->prtype = dtype_form_prtype(type->prtype,
+					 data_mysql_default_charset_coll);
+	dtype_set_mblen(type);
+}
+
+/**********************************************************************//**
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
+storage format. */
+UNIV_INLINE
+void
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
+	dtype_t*	type,	/*!< in: type struct */
+	const byte*	buf)	/*!< in: buffer for stored type order info */
+{
+	ulint	charset_coll;
+
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+
+	type->mtype = buf[0] & 63;
+	type->prtype = buf[1];
+
+	if (buf[0] & 128) {
+		type->prtype |= DATA_BINARY_TYPE;
+	}
+
+	if (buf[4] & 128) {
+		type->prtype |= DATA_NOT_NULL;
+	}
+
+	type->len = mach_read_from_2(buf + 2);
+
+	charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
+
+	if (dtype_is_string_type(type->mtype)) {
+		ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
+
+		if (charset_coll == 0) {
+			/* This insert buffer record was inserted with MySQL
+			version < 4.1.2, and the charset-collation code was not
+			explicitly stored to dtype->prtype at that time. It
+			must be the default charset-collation of this MySQL
+			installation. */
+
+			charset_coll = data_mysql_default_charset_coll;
+		}
+
+		type->prtype = dtype_form_prtype(type->prtype, charset_coll);
+	}
+	dtype_set_mblen(type);
+}
+
+/*********************************************************************//**
+Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
+@return the SQL type name */
+UNIV_INLINE
+char*
+dtype_sql_name(
+/*===========*/
+	unsigned	mtype,	/*!< in: mtype */
+	unsigned	prtype,	/*!< in: prtype */
+	unsigned	len,	/*!< in: len */
+	char*		name,	/*!< out: SQL name */
+	unsigned	name_sz)/*!< in: size of the name buffer */
+{
+
+#define APPEND_UNSIGNED()					\
+	do {							\
+		if (prtype & DATA_UNSIGNED) {			\
+			ut_snprintf(name + strlen(name),	\
+				    name_sz - strlen(name),	\
+				    " UNSIGNED");		\
+		}						\
+	} while (0)
+
+	ut_snprintf(name, name_sz, "UNKNOWN");
+
+	switch (mtype) {
+	case DATA_INT:
+		switch (len) {
+		case 1:
+			ut_snprintf(name, name_sz, "TINYINT");
+			break;
+		case 2:
+			ut_snprintf(name, name_sz, "SMALLINT");
+			break;
+		case 3:
+			ut_snprintf(name, name_sz, "MEDIUMINT");
+			break;
+		case 4:
+			ut_snprintf(name, name_sz, "INT");
+			break;
+		case 8:
+			ut_snprintf(name, name_sz, "BIGINT");
+			break;
+		}
+		APPEND_UNSIGNED();
+		break;
+	case DATA_FLOAT:
+		ut_snprintf(name, name_sz, "FLOAT");
+		APPEND_UNSIGNED();
+		break;
+	case DATA_DOUBLE:
+		ut_snprintf(name, name_sz, "DOUBLE");
+		APPEND_UNSIGNED();
+		break;
+	case DATA_FIXBINARY:
+		ut_snprintf(name, name_sz, "BINARY(%u)", len);
+		break;
+	case DATA_CHAR:
+	case DATA_MYSQL:
+		ut_snprintf(name, name_sz, "CHAR(%u)", len);
+		break;
+	case DATA_VARCHAR:
+	case DATA_VARMYSQL:
+		ut_snprintf(name, name_sz, "VARCHAR(%u)", len);
+		break;
+	case DATA_BINARY:
+		ut_snprintf(name, name_sz, "VARBINARY(%u)", len);
+		break;
+	case DATA_BLOB:
+		switch (len) {
+		case 9:
+			ut_snprintf(name, name_sz, "TINYBLOB");
+			break;
+		case 10:
+			ut_snprintf(name, name_sz, "BLOB");
+			break;
+		case 11:
+			ut_snprintf(name, name_sz, "MEDIUMBLOB");
+			break;
+		case 12:
+			ut_snprintf(name, name_sz, "LONGBLOB");
+			break;
+		}
+	}
+
+	if (prtype & DATA_NOT_NULL) {
+		ut_snprintf(name + strlen(name),
+			    name_sz - strlen(name),
+			    " NOT NULL");
+	}
+
+	return(name);
+}
+
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************************//**
+Returns the size of a fixed size data type, 0 if not a fixed size type.
+@return	fixed size, or 0 */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size_low(
+/*=====================*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminmaxlen,	/*!< in: minimum and maximum length of
+				a multibyte character, in bytes */
+	ulint	comp)		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+{
+	switch (mtype) {
+	case DATA_SYS:
+#ifdef UNIV_DEBUG
+		switch (prtype & DATA_MYSQL_TYPE_MASK) {
+		case DATA_ROW_ID:
+			ut_ad(len == DATA_ROW_ID_LEN);
+			break;
+		case DATA_TRX_ID:
+			ut_ad(len == DATA_TRX_ID_LEN);
+			break;
+		case DATA_ROLL_PTR:
+			ut_ad(len == DATA_ROLL_PTR_LEN);
+			break;
+		default:
+			ut_ad(0);
+			return(0);
+		}
+#endif /* UNIV_DEBUG */
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_INT:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+		return(len);
+	case DATA_MYSQL:
+#ifndef UNIV_HOTBACKUP
+		if (prtype & DATA_BINARY_TYPE) {
+			return(len);
+		} else if (!comp) {
+			return(len);
+		} else {
+#ifdef UNIV_DEBUG
+			ulint	i_mbminlen, i_mbmaxlen;
+
+			innobase_get_cset_width(
+				dtype_get_charset_coll(prtype),
+				&i_mbminlen, &i_mbmaxlen);
+
+			ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+			      == mbminmaxlen);
+#endif /* UNIV_DEBUG */
+			if (DATA_MBMINLEN(mbminmaxlen)
+			    == DATA_MBMAXLEN(mbminmaxlen)) {
+				return(len);
+			}
+		}
+#else /* !UNIV_HOTBACKUP */
+		return(len);
+#endif /* !UNIV_HOTBACKUP */
+		/* fall through for variable-length charsets */
+	case DATA_VARCHAR:
+	case DATA_BINARY:
+	case DATA_DECIMAL:
+	case DATA_VARMYSQL:
+	case DATA_BLOB:
+		return(0);
+	default:
+		ut_error;
+	}
+
+	return(0);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of a data type.
+@return	minimum size */
+UNIV_INLINE
+ulint
+dtype_get_min_size_low(
+/*===================*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	prtype,		/*!< in: precise type */
+	ulint	len,		/*!< in: length */
+	ulint	mbminmaxlen)	/*!< in: minimum and maximum length of a
+				multi-byte character */
+{
+	switch (mtype) {
+	case DATA_SYS:
+#ifdef UNIV_DEBUG
+		switch (prtype & DATA_MYSQL_TYPE_MASK) {
+		case DATA_ROW_ID:
+			ut_ad(len == DATA_ROW_ID_LEN);
+			break;
+		case DATA_TRX_ID:
+			ut_ad(len == DATA_TRX_ID_LEN);
+			break;
+		case DATA_ROLL_PTR:
+			ut_ad(len == DATA_ROLL_PTR_LEN);
+			break;
+		default:
+			ut_ad(0);
+			return(0);
+		}
+#endif /* UNIV_DEBUG */
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_INT:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+		return(len);
+	case DATA_MYSQL:
+		if (prtype & DATA_BINARY_TYPE) {
+			return(len);
+		} else {
+			ulint	mbminlen = DATA_MBMINLEN(mbminmaxlen);
+			ulint	mbmaxlen = DATA_MBMAXLEN(mbminmaxlen);
+
+			if (mbminlen == mbmaxlen) {
+				return(len);
+			}
+
+			/* this is a variable-length character set */
+			ut_a(mbminlen > 0);
+			ut_a(mbmaxlen > mbminlen);
+			ut_a(len % mbmaxlen == 0);
+			return(len * mbminlen / mbmaxlen);
+		}
+	case DATA_VARCHAR:
+	case DATA_BINARY:
+	case DATA_DECIMAL:
+	case DATA_VARMYSQL:
+	case DATA_BLOB:
+		return(0);
+	default:
+		ut_error;
+	}
+
+	return(0);
+}
+
+/***********************************************************************//**
+Returns the maximum size of a data type. Note: types in system tables may be
+incomplete and return incorrect information.
+@return	maximum size */
+UNIV_INLINE
+ulint
+dtype_get_max_size_low(
+/*===================*/
+	ulint	mtype,		/*!< in: main type */
+	ulint	len)		/*!< in: length */
+{
+	switch (mtype) {
+	case DATA_SYS:
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_INT:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+	case DATA_MYSQL:
+	case DATA_VARCHAR:
+	case DATA_BINARY:
+	case DATA_DECIMAL:
+	case DATA_VARMYSQL:
+		return(len);
+	case DATA_BLOB:
+		break;
+	default:
+		ut_error;
+	}
+
+	return(ULINT_MAX);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************************//**
+Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+	const dtype_t*	type,	/*!< in: type */
+	ulint		comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+{
+#ifndef UNIV_HOTBACKUP
+	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
+					type->mbminmaxlen, comp));
+#else /* !UNIV_HOTBACKUP */
+	return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
+					0, 0));
+#endif /* !UNIV_HOTBACKUP */
+}
diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h
new file mode 100644
index 00000000000..bd2bb577611
--- /dev/null
+++ b/storage/innobase/include/data0types.h
@@ -0,0 +1,36 @@
+/*****************************************************************************
+
+Copyright (c) 2000, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/data0types.h
+Some type definitions
+
+Created 9/21/2000 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0types_h
+#define data0types_h
+
+/* SQL data field struct */
+struct dfield_t;
+
+/* SQL data tuple struct */
+struct dtuple_t;
+
+#endif
+
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
new file mode 100644
index 00000000000..1e87ce3fdb8
--- /dev/null
+++ b/storage/innobase/include/db0err.h
@@ -0,0 +1,161 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/db0err.h
+Global error codes for the database
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+enum dberr_t {
+	DB_SUCCESS_LOCKED_REC = 9,	/*!< like DB_SUCCESS, but a new
+					explicit record lock was created */
+	DB_SUCCESS = 10,
+
+	/* The following are error codes */
+	DB_ERROR,
+	DB_INTERRUPTED,
+	DB_OUT_OF_MEMORY,
+	DB_OUT_OF_FILE_SPACE,
+	DB_LOCK_WAIT,
+	DB_DEADLOCK,
+	DB_ROLLBACK,
+	DB_DUPLICATE_KEY,
+	DB_QUE_THR_SUSPENDED,
+	DB_MISSING_HISTORY,		/*!< required history data has been
+					deleted due to lack of space in
+					rollback segment */
+	DB_CLUSTER_NOT_FOUND = 30,
+	DB_TABLE_NOT_FOUND,
+	DB_MUST_GET_MORE_FILE_SPACE,	/*!< the database has to be stopped
+					and restarted with more file space */
+	DB_TABLE_IS_BEING_USED,
+	DB_TOO_BIG_RECORD,		/*!< a record in an index would not fit
+					on a compressed page, or it would
+					become bigger than 1/2 free space in
+					an uncompressed page frame */
+	DB_LOCK_WAIT_TIMEOUT,		/*!< lock wait lasted too long */
+	DB_NO_REFERENCED_ROW,		/*!< referenced key value not found
+					for a foreign key in an insert or
+					update of a row */
+	DB_ROW_IS_REFERENCED,		/*!< cannot delete or update a row
+					because it contains a key value
+					which is referenced */
+	DB_CANNOT_ADD_CONSTRAINT,	/*!< adding a foreign key constraint
+					to a table failed */
+	DB_CORRUPTION,			/*!< data structure corruption noticed */
+	DB_CANNOT_DROP_CONSTRAINT,	/*!< dropping a foreign key constraint
+					from a table failed */
+	DB_NO_SAVEPOINT,		/*!< no savepoint exists with the given
+					name */
+	DB_TABLESPACE_EXISTS,		/*!< we cannot create a new single-table
+					tablespace because a file of the same
+					name already exists */
+	DB_TABLESPACE_DELETED,		/*!< tablespace was deleted or is
+					being dropped right now */
+	DB_TABLESPACE_NOT_FOUND,	/*<! Attempt to delete a tablespace
+					instance that was not found in the
+					tablespace hash table */
+	DB_LOCK_TABLE_FULL,		/*!< lock structs have exhausted the
+					buffer pool (for big transactions,
+					InnoDB stores the lock structs in the
+					buffer pool) */
+	DB_FOREIGN_DUPLICATE_KEY,	/*!< foreign key constraints
+					activated by the operation would
+					lead to a duplicate key in some
+					table */
+	DB_TOO_MANY_CONCURRENT_TRXS,	/*!< when InnoDB runs out of the
+					preconfigured undo slots, this can
+					only happen when there are too many
+					concurrent transactions */
+	DB_UNSUPPORTED,			/*!< when InnoDB sees any artefact or
+					a feature that it can't recoginize or
+					work with e.g., FT indexes created by
+					a later version of the engine. */
+
+	DB_INVALID_NULL,		/*!< a NOT NULL column was found to
+					be NULL during table rebuild */
+
+	DB_STATS_DO_NOT_EXIST,		/*!< an operation that requires the
+					persistent storage, used for recording
+					table and index statistics, was
+					requested but this storage does not
+					exist itself or the stats for a given
+					table do not exist */
+	DB_FOREIGN_EXCEED_MAX_CASCADE,	/*!< Foreign key constraint related
+					cascading delete/update exceeds
+					maximum allowed depth */
+	DB_CHILD_NO_INDEX,		/*!< the child (foreign) table does
+					not have an index that contains the
+					foreign keys as its prefix columns */
+	DB_PARENT_NO_INDEX,		/*!< the parent table does not
+					have an index that contains the
+					foreign keys as its prefix columns */
+	DB_TOO_BIG_INDEX_COL,		/*!< index column size exceeds
+					maximum limit */
+	DB_INDEX_CORRUPT,		/*!< we have corrupted index */
+	DB_UNDO_RECORD_TOO_BIG,		/*!< the undo log record is too big */
+	DB_READ_ONLY,			/*!< Update operation attempted in
+					a read-only transaction */
+	DB_FTS_INVALID_DOCID,		/* FTS Doc ID cannot be zero */
+	DB_TABLE_IN_FK_CHECK,		/* table is being used in foreign
+					key check */
+	DB_ONLINE_LOG_TOO_BIG,		/*!< Modification log grew too big
+					during online index creation */
+
+	DB_IO_ERROR,			/*!< Generic IO error */
+	DB_IDENTIFIER_TOO_LONG,		/*!< Identifier name too long */
+	DB_FTS_EXCEED_RESULT_CACHE_LIMIT,	/*!< FTS query memory
+					exceeds result cache limit */
+	DB_TEMP_FILE_WRITE_FAILURE,	/*!< Temp file write failure */
+	DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
+					/*< Too many words in a phrase */
+	DB_TOO_BIG_FOR_REDO,		/* Record length greater than 10%
+					of redo log */
+	/* The following are partial failure codes */
+	DB_FAIL = 1000,
+	DB_OVERFLOW,
+	DB_UNDERFLOW,
+	DB_STRONG_FAIL,
+	DB_ZIP_OVERFLOW,
+	DB_RECORD_NOT_FOUND = 1500,
+	DB_END_OF_INDEX,
+	DB_DICT_CHANGED,		/*!< Some part of table dictionary has
+					changed. Such as index dropped or
+					foreign key dropped */
+
+
+        /* The following are API only error codes. */
+	DB_DATA_MISMATCH = 2000,	/*!< Column update or read failed
+					because the types mismatch */
+
+	DB_SCHEMA_NOT_LOCKED,		/*!< If an API function expects the
+					schema to be locked in exclusive mode
+					and if it's not then that API function
+					will return this error code */
+
+	DB_NOT_FOUND			/*!< Generic error code for "Not found"
+					type of errors */
+};
+
+#endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
new file mode 100644
index 00000000000..a994c9d8ff1
--- /dev/null
+++ b/storage/innobase/include/dict0boot.h
@@ -0,0 +1,342 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0boot.h
+Data dictionary creation and booting
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0boot_h
+#define dict0boot_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "buf0buf.h"
+#include "fsp0fsp.h"
+#include "dict0dict.h"
+
+typedef	byte	dict_hdr_t;
+
+/**********************************************************************//**
+Gets a pointer to the dictionary header and x-latches its page.
+@return	pointer to the dictionary header, page x-latched */
+UNIV_INTERN
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+	mtr_t*	mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Returns a new table, index, or space id. */
+UNIV_INTERN
+void
+dict_hdr_get_new_id(
+/*================*/
+	table_id_t*	table_id,	/*!< out: table id
+					(not assigned if NULL) */
+	index_id_t*	index_id,	/*!< out: index id
+					(not assigned if NULL) */
+	ulint*		space_id);	/*!< out: space id
+					(not assigned if NULL) */
+/**********************************************************************//**
+Writes the current value of the row id counter to the dictionary header file
+page. */
+UNIV_INTERN
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+/**********************************************************************//**
+Returns a new row id.
+@return	the new id */
+UNIV_INLINE
+row_id_t
+dict_sys_get_new_row_id(void);
+/*=========================*/
+/**********************************************************************//**
+Reads a row id from a record or other 6-byte stored form.
+@return	row id */
+UNIV_INLINE
+row_id_t
+dict_sys_read_row_id(
+/*=================*/
+	const byte*	field);	/*!< in: record field */
+/**********************************************************************//**
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+	byte*		field,	/*!< in: record field */
+	row_id_t	row_id);/*!< in: row id */
+/*****************************************************************//**
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+dict_boot(void)
+/*===========*/
+	__attribute__((warn_unused_result));
+
+/*****************************************************************//**
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+dict_create(void)
+/*=============*/
+	__attribute__((warn_unused_result));
+
+/*********************************************************************//**
+Check if a table id belongs to  system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+	table_id_t	id)		/*!< in: table id to check */
+	__attribute__((warn_unused_result));
+
+/* Space id and page no where the dictionary header resides */
+#define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
+#define	DICT_HDR_PAGE_NO	FSP_DICT_HDR_PAGE_NO
+
+/* The ids for the basic system tables and their indexes */
+#define DICT_TABLES_ID		1
+#define DICT_COLUMNS_ID		2
+#define DICT_INDEXES_ID		3
+#define DICT_FIELDS_ID		4
+/* The following is a secondary index on SYS_TABLES */
+#define DICT_TABLE_IDS_ID	5
+
+#define	DICT_HDR_FIRST_ID	10	/* the ids for tables etc. start
+					from this number, except for basic
+					system tables and their above defined
+					indexes; ibuf tables and indexes are
+					assigned as the id the number
+					DICT_IBUF_ID_MIN plus the space id */
+
+/* The offset of the dictionary header on the page */
+#define	DICT_HDR		FSEG_PAGE_DATA
+
+/*-------------------------------------------------------------*/
+/* Dictionary header offsets */
+#define DICT_HDR_ROW_ID		0	/* The latest assigned row id */
+#define DICT_HDR_TABLE_ID	8	/* The latest assigned table id */
+#define DICT_HDR_INDEX_ID	16	/* The latest assigned index id */
+#define DICT_HDR_MAX_SPACE_ID	24	/* The latest assigned space id,or 0*/
+#define DICT_HDR_MIX_ID_LOW	28	/* Obsolete,always DICT_HDR_FIRST_ID*/
+#define DICT_HDR_TABLES		32	/* Root of SYS_TABLES clust index */
+#define DICT_HDR_TABLE_IDS	36	/* Root of SYS_TABLE_IDS sec index */
+#define DICT_HDR_COLUMNS	40	/* Root of SYS_COLUMNS clust index */
+#define DICT_HDR_INDEXES	44	/* Root of SYS_INDEXES clust index */
+#define DICT_HDR_FIELDS		48	/* Root of SYS_FIELDS clust index */
+
+#define DICT_HDR_FSEG_HEADER	56	/* Segment header for the tablespace
+					segment into which the dictionary
+					header is created */
+/*-------------------------------------------------------------*/
+
+/* The columns in SYS_TABLES */
+enum dict_col_sys_tables_enum {
+	DICT_COL__SYS_TABLES__NAME		= 0,
+	DICT_COL__SYS_TABLES__ID		= 1,
+	DICT_COL__SYS_TABLES__N_COLS		= 2,
+	DICT_COL__SYS_TABLES__TYPE		= 3,
+	DICT_COL__SYS_TABLES__MIX_ID		= 4,
+	DICT_COL__SYS_TABLES__MIX_LEN		= 5,
+	DICT_COL__SYS_TABLES__CLUSTER_ID	= 6,
+	DICT_COL__SYS_TABLES__SPACE		= 7,
+	DICT_NUM_COLS__SYS_TABLES		= 8
+};
+/* The field numbers in the SYS_TABLES clustered index */
+enum dict_fld_sys_tables_enum {
+	DICT_FLD__SYS_TABLES__NAME		= 0,
+	DICT_FLD__SYS_TABLES__DB_TRX_ID		= 1,
+	DICT_FLD__SYS_TABLES__DB_ROLL_PTR	= 2,
+	DICT_FLD__SYS_TABLES__ID		= 3,
+	DICT_FLD__SYS_TABLES__N_COLS		= 4,
+	DICT_FLD__SYS_TABLES__TYPE		= 5,
+	DICT_FLD__SYS_TABLES__MIX_ID		= 6,
+	DICT_FLD__SYS_TABLES__MIX_LEN		= 7,
+	DICT_FLD__SYS_TABLES__CLUSTER_ID	= 8,
+	DICT_FLD__SYS_TABLES__SPACE		= 9,
+	DICT_NUM_FIELDS__SYS_TABLES		= 10
+};
+/* The field numbers in the SYS_TABLE_IDS index */
+enum dict_fld_sys_table_ids_enum {
+	DICT_FLD__SYS_TABLE_IDS__ID		= 0,
+	DICT_FLD__SYS_TABLE_IDS__NAME		= 1,
+	DICT_NUM_FIELDS__SYS_TABLE_IDS		= 2
+};
+/* The columns in SYS_COLUMNS */
+enum dict_col_sys_columns_enum {
+	DICT_COL__SYS_COLUMNS__TABLE_ID		= 0,
+	DICT_COL__SYS_COLUMNS__POS		= 1,
+	DICT_COL__SYS_COLUMNS__NAME		= 2,
+	DICT_COL__SYS_COLUMNS__MTYPE		= 3,
+	DICT_COL__SYS_COLUMNS__PRTYPE		= 4,
+	DICT_COL__SYS_COLUMNS__LEN		= 5,
+	DICT_COL__SYS_COLUMNS__PREC		= 6,
+	DICT_NUM_COLS__SYS_COLUMNS		= 7
+};
+/* The field numbers in the SYS_COLUMNS clustered index */
+enum dict_fld_sys_columns_enum {
+	DICT_FLD__SYS_COLUMNS__TABLE_ID		= 0,
+	DICT_FLD__SYS_COLUMNS__POS		= 1,
+	DICT_FLD__SYS_COLUMNS__DB_TRX_ID	= 2,
+	DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR	= 3,
+	DICT_FLD__SYS_COLUMNS__NAME		= 4,
+	DICT_FLD__SYS_COLUMNS__MTYPE		= 5,
+	DICT_FLD__SYS_COLUMNS__PRTYPE		= 6,
+	DICT_FLD__SYS_COLUMNS__LEN		= 7,
+	DICT_FLD__SYS_COLUMNS__PREC		= 8,
+	DICT_NUM_FIELDS__SYS_COLUMNS		= 9
+};
+/* The columns in SYS_INDEXES */
+enum dict_col_sys_indexes_enum {
+	DICT_COL__SYS_INDEXES__TABLE_ID		= 0,
+	DICT_COL__SYS_INDEXES__ID		= 1,
+	DICT_COL__SYS_INDEXES__NAME		= 2,
+	DICT_COL__SYS_INDEXES__N_FIELDS		= 3,
+	DICT_COL__SYS_INDEXES__TYPE		= 4,
+	DICT_COL__SYS_INDEXES__SPACE		= 5,
+	DICT_COL__SYS_INDEXES__PAGE_NO		= 6,
+	DICT_NUM_COLS__SYS_INDEXES		= 7
+};
+/* The field numbers in the SYS_INDEXES clustered index */
+enum dict_fld_sys_indexes_enum {
+	DICT_FLD__SYS_INDEXES__TABLE_ID		= 0,
+	DICT_FLD__SYS_INDEXES__ID		= 1,
+	DICT_FLD__SYS_INDEXES__DB_TRX_ID	= 2,
+	DICT_FLD__SYS_INDEXES__DB_ROLL_PTR	= 3,
+	DICT_FLD__SYS_INDEXES__NAME		= 4,
+	DICT_FLD__SYS_INDEXES__N_FIELDS		= 5,
+	DICT_FLD__SYS_INDEXES__TYPE		= 6,
+	DICT_FLD__SYS_INDEXES__SPACE		= 7,
+	DICT_FLD__SYS_INDEXES__PAGE_NO		= 8,
+	DICT_NUM_FIELDS__SYS_INDEXES		= 9
+};
+/* The columns in SYS_FIELDS */
+enum dict_col_sys_fields_enum {
+	DICT_COL__SYS_FIELDS__INDEX_ID		= 0,
+	DICT_COL__SYS_FIELDS__POS		= 1,
+	DICT_COL__SYS_FIELDS__COL_NAME		= 2,
+	DICT_NUM_COLS__SYS_FIELDS		= 3
+};
+/* The field numbers in the SYS_FIELDS clustered index */
+enum dict_fld_sys_fields_enum {
+	DICT_FLD__SYS_FIELDS__INDEX_ID		= 0,
+	DICT_FLD__SYS_FIELDS__POS		= 1,
+	DICT_FLD__SYS_FIELDS__DB_TRX_ID		= 2,
+	DICT_FLD__SYS_FIELDS__DB_ROLL_PTR	= 3,
+	DICT_FLD__SYS_FIELDS__COL_NAME		= 4,
+	DICT_NUM_FIELDS__SYS_FIELDS		= 5
+};
+/* The columns in SYS_FOREIGN */
+enum dict_col_sys_foreign_enum {
+	DICT_COL__SYS_FOREIGN__ID		= 0,
+	DICT_COL__SYS_FOREIGN__FOR_NAME		= 1,
+	DICT_COL__SYS_FOREIGN__REF_NAME		= 2,
+	DICT_COL__SYS_FOREIGN__N_COLS		= 3,
+	DICT_NUM_COLS__SYS_FOREIGN		= 4
+};
+/* The field numbers in the SYS_FOREIGN clustered index */
+enum dict_fld_sys_foreign_enum {
+	DICT_FLD__SYS_FOREIGN__ID		= 0,
+	DICT_FLD__SYS_FOREIGN__DB_TRX_ID	= 1,
+	DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR	= 2,
+	DICT_FLD__SYS_FOREIGN__FOR_NAME		= 3,
+	DICT_FLD__SYS_FOREIGN__REF_NAME		= 4,
+	DICT_FLD__SYS_FOREIGN__N_COLS		= 5,
+	DICT_NUM_FIELDS__SYS_FOREIGN		= 6
+};
+/* The field numbers in the SYS_FOREIGN_FOR_NAME secondary index */
+enum dict_fld_sys_foreign_for_name_enum {
+	DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME	= 0,
+	DICT_FLD__SYS_FOREIGN_FOR_NAME__ID	= 1,
+	DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME	= 2
+};
+/* The columns in SYS_FOREIGN_COLS */
+enum dict_col_sys_foreign_cols_enum {
+	DICT_COL__SYS_FOREIGN_COLS__ID			= 0,
+	DICT_COL__SYS_FOREIGN_COLS__POS			= 1,
+	DICT_COL__SYS_FOREIGN_COLS__FOR_COL_NAME	= 2,
+	DICT_COL__SYS_FOREIGN_COLS__REF_COL_NAME	= 3,
+	DICT_NUM_COLS__SYS_FOREIGN_COLS			= 4
+};
+/* The field numbers in the SYS_FOREIGN_COLS clustered index */
+enum dict_fld_sys_foreign_cols_enum {
+	DICT_FLD__SYS_FOREIGN_COLS__ID			= 0,
+	DICT_FLD__SYS_FOREIGN_COLS__POS			= 1,
+	DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID		= 2,
+	DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR		= 3,
+	DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME	= 4,
+	DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME	= 5,
+	DICT_NUM_FIELDS__SYS_FOREIGN_COLS		= 6
+};
+/* The columns in SYS_TABLESPACES */
+enum dict_col_sys_tablespaces_enum {
+	DICT_COL__SYS_TABLESPACES__SPACE		= 0,
+	DICT_COL__SYS_TABLESPACES__NAME			= 1,
+	DICT_COL__SYS_TABLESPACES__FLAGS		= 2,
+	DICT_NUM_COLS__SYS_TABLESPACES			= 3
+};
+/* The field numbers in the SYS_TABLESPACES clustered index */
+enum dict_fld_sys_tablespaces_enum {
+	DICT_FLD__SYS_TABLESPACES__SPACE		= 0,
+	DICT_FLD__SYS_TABLESPACES__DB_TRX_ID		= 1,
+	DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR		= 2,
+	DICT_FLD__SYS_TABLESPACES__NAME			= 3,
+	DICT_FLD__SYS_TABLESPACES__FLAGS		= 4,
+	DICT_NUM_FIELDS__SYS_TABLESPACES		= 5
+};
+/* The columns in SYS_DATAFILES */
+enum dict_col_sys_datafiles_enum {
+	DICT_COL__SYS_DATAFILES__SPACE			= 0,
+	DICT_COL__SYS_DATAFILES__PATH			= 1,
+	DICT_NUM_COLS__SYS_DATAFILES			= 2
+};
+/* The field numbers in the SYS_DATAFILES clustered index */
+enum dict_fld_sys_datafiles_enum {
+	DICT_FLD__SYS_DATAFILES__SPACE			= 0,
+	DICT_FLD__SYS_DATAFILES__DB_TRX_ID		= 1,
+	DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR		= 2,
+	DICT_FLD__SYS_DATAFILES__PATH			= 3,
+	DICT_NUM_FIELDS__SYS_DATAFILES			= 4
+};
+
+/* A number of the columns above occur in multiple tables.  These are the
+length of thos fields. */
+#define	DICT_FLD_LEN_SPACE	4
+#define	DICT_FLD_LEN_FLAGS	4
+
+/* When a row id which is zero modulo this number (which must be a power of
+two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
+updated */
+#define DICT_HDR_ROW_ID_WRITE_MARGIN	256
+
+#ifndef UNIV_NONINL
+#include "dict0boot.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
new file mode 100644
index 00000000000..2b156a4f672
--- /dev/null
+++ b/storage/innobase/include/dict0boot.ic
@@ -0,0 +1,96 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0boot.ic
+Data dictionary creation and booting
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+/**********************************************************************//**
+Returns a new row id.
+@return	the new id */
+UNIV_INLINE
+row_id_t
+dict_sys_get_new_row_id(void)
+/*=========================*/
+{
+	row_id_t	id;
+
+	mutex_enter(&(dict_sys->mutex));
+
+	id = dict_sys->row_id;
+
+	if (0 == (id % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
+
+		dict_hdr_flush_row_id();
+	}
+
+	dict_sys->row_id++;
+
+	mutex_exit(&(dict_sys->mutex));
+
+	return(id);
+}
+
+/**********************************************************************//**
+Reads a row id from a record or other 6-byte stored form.
+@return	row id */
+UNIV_INLINE
+row_id_t
+dict_sys_read_row_id(
+/*=================*/
+	const byte*	field)	/*!< in: record field */
+{
+#if DATA_ROW_ID_LEN != 6
+# error "DATA_ROW_ID_LEN != 6"
+#endif
+
+	return(mach_read_from_6(field));
+}
+
+/**********************************************************************//**
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+	byte*		field,	/*!< in: record field */
+	row_id_t	row_id)	/*!< in: row id */
+{
+#if DATA_ROW_ID_LEN != 6
+# error "DATA_ROW_ID_LEN != 6"
+#endif
+
+	mach_write_to_6(field, row_id);
+}
+
+/*********************************************************************//**
+Check if a table id belongs to  system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+	table_id_t	id)		/*!< in: table id to check */
+{
+	return(id < DICT_HDR_FIRST_ID);
+}
+
+
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
new file mode 100644
index 00000000000..67eab9058da
--- /dev/null
+++ b/storage/innobase/include/dict0crea.h
@@ -0,0 +1,246 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0crea.h
+Database object creation
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0crea_h
+#define dict0crea_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0dict.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/*********************************************************************//**
+Creates a table create graph.
+@return	own: table create node */
+UNIV_INTERN
+tab_node_t*
+tab_create_graph_create(
+/*====================*/
+	dict_table_t*	table,	/*!< in: table to create, built as a memory data
+				structure */
+	mem_heap_t*	heap,	/*!< in: heap where created */
+	bool		commit);/*!< in: true if the commit node should be
+				added to the query graph */
+/*********************************************************************//**
+Creates an index create graph.
+@return	own: index create node */
+UNIV_INTERN
+ind_node_t*
+ind_create_graph_create(
+/*====================*/
+	dict_index_t*	index,	/*!< in: index to create, built as a memory data
+				structure */
+	mem_heap_t*	heap,	/*!< in: heap where created */
+	bool		commit);/*!< in: true if the commit node should be
+				added to the query graph */
+/***********************************************************//**
+Creates a table. This is a high-level function used in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+dict_create_table_step(
+/*===================*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************//**
+Creates an index. This is a high-level function used in SQL execution
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+dict_create_index_step(
+/*===================*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/*******************************************************************//**
+Truncates the index tree associated with a row in SYS_INDEXES table.
+@return	new root page number, or FIL_NULL on failure */
+UNIV_INTERN
+ulint
+dict_truncate_index_tree(
+/*=====================*/
+	dict_table_t*	table,	/*!< in: the table the index belongs to */
+	ulint		space,	/*!< in: 0=truncate,
+				nonzero=create the index tree in the
+				given tablespace */
+	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
+				record in the clustered index of
+				SYS_INDEXES table. The cursor may be
+				repositioned in this call. */
+	mtr_t*		mtr);	/*!< in: mtr having the latch
+				on the record page. The mtr may be
+				committed and restarted in this call. */
+/*******************************************************************//**
+Drops the index tree associated with a row in SYS_INDEXES table. */
+UNIV_INTERN
+void
+dict_drop_index_tree(
+/*=================*/
+	rec_t*	rec,	/*!< in/out: record in the clustered index
+			of SYS_INDEXES table */
+	mtr_t*	mtr);	/*!< in: mtr having the latch on the record page */
+/****************************************************************//**
+Creates the foreign key constraints system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_foreign_constraint_tables(void);
+/*================================================*/
+/********************************************************************//**
+Generate a foreign key constraint name when it was not named by the user.
+A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
+where the numbers start from 1, and are given locally for this table, that is,
+the number is not global, as it used to be before MySQL 4.0.18.  */
+UNIV_INLINE
+dberr_t
+dict_create_add_foreign_id(
+/*=======================*/
+	ulint*		id_nr,	/*!< in/out: number to use in id generation;
+				incremented if used */
+	const char*	name,	/*!< in: table name */
+	dict_foreign_t*	foreign)/*!< in/out: foreign key */
+	__attribute__((nonnull));
+
+/** Adds the given set of foreign key objects to the dictionary tables
+in the database. This function does not modify the dictionary cache. The
+caller must ensure that all foreign key objects contain a valid constraint
+name in foreign->id.
+@param[in]	local_fk_set	set of foreign key objects, to be added to
+the dictionary tables
+@param[in]	table		table to which the foreign key objects in
+local_fk_set belong to
+@param[in,out]	trx		transaction
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_foreigns_to_dictionary(
+/*===================================*/
+	const dict_foreign_set&	local_fk_set,
+	const dict_table_t*	table,
+	trx_t*			trx)
+	__attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_tablespace(void);
+/*=====================================*/
+/********************************************************************//**
+Add a single tablespace definition to the data dictionary tables in the
+database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_tablespace_to_dictionary(
+/*=====================================*/
+	ulint		space,		/*!< in: tablespace id */
+	const char*	name,		/*!< in: tablespace name */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	path,		/*!< in: tablespace path */
+	trx_t*		trx,		/*!< in: transaction */
+	bool		commit);	/*!< in: if true then commit the
+					transaction */
+/********************************************************************//**
+Add a foreign key definition to the data dictionary tables.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_foreign_to_dictionary(
+/*==================================*/
+	const char*		name,	/*!< in: table name */
+	const dict_foreign_t*	foreign,/*!< in: foreign key */
+	trx_t*			trx)	/*!< in/out: dictionary transaction */
+	__attribute__((nonnull, warn_unused_result));
+
+/* Table create node structure */
+struct tab_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_TABLE_CREATE */
+	dict_table_t*	table;	/*!< table to create, built as a memory data
+				structure with dict_mem_... functions */
+	ins_node_t*	tab_def; /* child node which does the insert of
+				the table definition; the row to be inserted
+				is built by the parent node  */
+	ins_node_t*	col_def; /* child node which does the inserts of
+				the column definitions; the row to be inserted
+				is built by the parent node  */
+	commit_node_t*	commit_node;
+				/* child node which performs a commit after
+				a successful table creation */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/*!< node execution state */
+	ulint		col_no;	/*!< next column definition to insert */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
+};
+
+/* Table create node states */
+#define	TABLE_BUILD_TABLE_DEF	1
+#define	TABLE_BUILD_COL_DEF	2
+#define	TABLE_COMMIT_WORK	3
+#define	TABLE_ADD_TO_CACHE	4
+#define	TABLE_COMPLETED		5
+
+/* Index create node struct */
+
+struct ind_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_INDEX_CREATE */
+	dict_index_t*	index;	/*!< index to create, built as a memory data
+				structure with dict_mem_... functions */
+	ins_node_t*	ind_def; /* child node which does the insert of
+				the index definition; the row to be inserted
+				is built by the parent node  */
+	ins_node_t*	field_def; /* child node which does the inserts of
+				the field definitions; the row to be inserted
+				is built by the parent node  */
+	commit_node_t*	commit_node;
+				/* child node which performs a commit after
+				a successful index creation */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/*!< node execution state */
+	ulint		page_no;/* root page number of the index */
+	dict_table_t*	table;	/*!< table which owns the index */
+	dtuple_t*	ind_row;/* index definition row built */
+	ulint		field_no;/* next field definition to insert */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage */
+};
+
+/* Index create node states */
+#define	INDEX_BUILD_INDEX_DEF	1
+#define	INDEX_BUILD_FIELD_DEF	2
+#define	INDEX_CREATE_INDEX_TREE	3
+#define	INDEX_COMMIT_WORK	4
+#define	INDEX_ADD_TO_CACHE	5
+
+#ifndef UNIV_NONINL
+#include "dict0crea.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic
new file mode 100644
index 00000000000..2d0d9dcb858
--- /dev/null
+++ b/storage/innobase/include/dict0crea.ic
@@ -0,0 +1,98 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0crea.ic
+Database object creation
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mem0mem.h"
+
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+	const char*     name) __attribute__((warn_unused_result));
+				/*!< in: table name in the form
+				'database/tablename' */
+
+
+/********************************************************************//**
+Generate a foreign key constraint name when it was not named by the user.
+A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
+where the numbers start from 1, and are given locally for this table, that is,
+the number is not global, as it used to be before MySQL 4.0.18.  */
+UNIV_INLINE
+dberr_t
+dict_create_add_foreign_id(
+/*=======================*/
+	ulint*		id_nr,	/*!< in/out: number to use in id generation;
+				incremented if used */
+	const char*	name,	/*!< in: table name */
+	dict_foreign_t*	foreign)/*!< in/out: foreign key */
+{
+	if (foreign->id == NULL) {
+		/* Generate a new constraint id */
+		ulint	namelen	= strlen(name);
+		char*	id	= static_cast<char*>(
+					mem_heap_alloc(foreign->heap,
+						       namelen + 20));
+
+		if (row_is_mysql_tmp_table_name(name)) {
+
+			/* no overflow if number < 1e13 */
+			sprintf(id, "%s_ibfk_%lu", name,
+				(ulong) (*id_nr)++);
+		} else {
+			char	table_name[MAX_TABLE_NAME_LEN + 20] = "";
+			uint	errors = 0;
+
+			strncpy(table_name, name,
+				MAX_TABLE_NAME_LEN + 20);
+
+			innobase_convert_to_system_charset(
+				strchr(table_name, '/') + 1,
+				strchr(name, '/') + 1,
+				MAX_TABLE_NAME_LEN, &errors);
+
+			if (errors) {
+				strncpy(table_name, name,
+					MAX_TABLE_NAME_LEN + 20);
+			}
+
+			/* no overflow if number < 1e13 */
+			sprintf(id, "%s_ibfk_%lu", table_name,
+				(ulong) (*id_nr)++);
+
+			if (innobase_check_identifier_length(
+				strchr(id,'/') + 1)) {
+				return(DB_IDENTIFIER_TOO_LONG);
+			}
+		}
+		foreign->id = id;
+	}
+
+	return(DB_SUCCESS);
+}
+
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
new file mode 100644
index 00000000000..dd61e5becc1
--- /dev/null
+++ b/storage/innobase/include/dict0dict.h
@@ -0,0 +1,1841 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0dict.h
+Data dictionary system
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0dict_h
+#define dict0dict_h
+
+#include "univ.i"
+#include "db0err.h"
+#include "dict0types.h"
+#include "dict0mem.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "hash0hash.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "trx0types.h"
+#include "row0types.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+# include "sync0rw.h"
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+UNIV_INTERN
+void
+dict_casedn_str(
+/*============*/
+	char*	a)	/*!< in/out: string to put in lower case */
+	__attribute__((nonnull));
+/********************************************************************//**
+Get the database name length in a table name.
+@return	database name length */
+UNIV_INTERN
+ulint
+dict_get_db_name_len(
+/*=================*/
+	const char*	name)	/*!< in: table name in the form
+				dbname '/' tablename */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+UNIV_INTERN
+char*
+dict_get_referenced_table(
+/*======================*/
+	const char*	name,		/*!< in: foreign key table name */
+	const char*	database_name,	/*!< in: table db name */
+	ulint		database_name_len,/*!< in: db name length */
+	const char*	table_name,	/*!< in: table name */
+	ulint		table_name_len,	/*!< in: table name length */
+	dict_table_t**	table,		/*!< out: table object or NULL */
+	mem_heap_t*	heap);		/*!< in: heap memory */
+/*********************************************************************//**
+Frees a foreign key struct. */
+
+void
+dict_foreign_free(
+/*==============*/
+	dict_foreign_t*	foreign);	/*!< in, own: foreign key struct */
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
+only at the >= 4.0.18-format id's, which are of the form
+databasename/tablename_ibfk_[number].
+@return highest number, 0 if table has no new format foreign key constraints */
+UNIV_INTERN
+ulint
+dict_table_get_highest_foreign_id(
+/*==============================*/
+	dict_table_t*	table);		/*!< in: table in the dictionary
+					memory cache */
+/********************************************************************//**
+Return the end of table name where we have removed dbname and '/'.
+@return	table name */
+UNIV_INTERN
+const char*
+dict_remove_db_name(
+/*================*/
+	const char*	name)	/*!< in: table name in the form
+				dbname '/' tablename */
+	__attribute__((nonnull, warn_unused_result));
+
+/** Operation to perform when opening a table */
+enum dict_table_op_t {
+	/** Expect the tablespace to exist. */
+	DICT_TABLE_OP_NORMAL = 0,
+	/** Drop any orphan indexes after an aborted online index creation */
+	DICT_TABLE_OP_DROP_ORPHAN,
+	/** Silently load the tablespace if it does not exist,
+	and do not load the definitions of incomplete indexes. */
+	DICT_TABLE_OP_LOAD_TABLESPACE
+};
+
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_table_open_on_id(
+/*==================*/
+	table_id_t	table_id,	/*!< in: table id */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	dict_table_op_t	table_op)	/*!< in: operation to perform */
+	__attribute__((warn_unused_result));
+/********************************************************************//**
+Decrements the count of open handles to a table. */
+UNIV_INTERN
+void
+dict_table_close(
+/*=============*/
+	dict_table_t*	table,		/*!< in/out: table */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop)	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Inits the data dictionary module. */
+UNIV_INTERN
+void
+dict_init(void);
+/*===========*/
+/********************************************************************//**
+Gets the space id of every table of the data dictionary and makes a linear
+list and a hash table of them to the data dictionary cache. This function
+can be called at database startup if we did not need to do a crash recovery.
+In crash recovery we must scan the space id's from the .ibd files in MySQL
+database directories. */
+UNIV_INTERN
+void
+dict_load_space_id_list(void);
+/*=========================*/
+/*********************************************************************//**
+Gets the minimum number of bytes per character.
+@return minimum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbminlen(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets the maximum number of bytes per character.
+@return maximum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbmaxlen(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Sets the minimum and maximum number of bytes per character. */
+UNIV_INLINE
+void
+dict_col_set_mbminmaxlen(
+/*=====================*/
+	dict_col_t*	col,		/*!< in/out: column */
+	ulint		mbminlen,	/*!< in: minimum multi-byte
+					character size, in bytes */
+	ulint		mbmaxlen)	/*!< in: minimum multi-byte
+					character size, in bytes */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Gets the column data type. */
+UNIV_INLINE
+void
+dict_col_copy_type(
+/*===============*/
+	const dict_col_t*	col,	/*!< in: column */
+	dtype_t*		type)	/*!< out: data type */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Determine bytes of column prefix to be stored in the undo log. Please
+note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+needs to be stored in the undo log.
+@return bytes of column prefix to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_field_len_store_undo(
+/*==========================*/
+	dict_table_t*		table,	/*!< in: table */
+	const dict_col_t*	col)	/*!< in: column which index prefix
+					is based on */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Assert that a column and a data type match.
+@return	TRUE */
+UNIV_INLINE
+ibool
+dict_col_type_assert_equal(
+/*=======================*/
+	const dict_col_t*	col,	/*!< in: column */
+	const dtype_t*		type)	/*!< in: data type */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of the column.
+@return	minimum size */
+UNIV_INLINE
+ulint
+dict_col_get_min_size(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************************//**
+Returns the maximum size of the column.
+@return	maximum size */
+UNIV_INLINE
+ulint
+dict_col_get_max_size(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************************//**
+Returns the size of a fixed size column, 0 if not a fixed size column.
+@return	fixed size, or 0 */
+UNIV_INLINE
+ulint
+dict_col_get_fixed_size(
+/*====================*/
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************************//**
+Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+UNIV_INLINE
+ulint
+dict_col_get_sql_null_size(
+/*=======================*/
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets the column number.
+@return	col->ind, table column position (starting from 0) */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	const dict_col_t*	col)	/*!< in: column */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	const dict_col_t*	col,		/*!< in: table column */
+	const dict_index_t*	clust_index)	/*!< in: clustered index */
+	__attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+If the given column name is reserved for InnoDB system columns, return
+TRUE.
+@return	TRUE if name is reserved */
+UNIV_INTERN
+ibool
+dict_col_name_is_reserved(
+/*======================*/
+	const char*	name)	/*!< in: column name */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Acquire the autoinc lock. */
+UNIV_INTERN
+void
+dict_table_autoinc_lock(
+/*====================*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
+/********************************************************************//**
+Unconditionally set the autoinc counter. */
+UNIV_INTERN
+void
+dict_table_autoinc_initialize(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ib_uint64_t	value)	/*!< in: next value to assign to a row */
+	__attribute__((nonnull));
+/********************************************************************//**
+Reads the next autoinc value (== autoinc counter value), 0 if not yet
+initialized.
+@return	value for a new row, or 0 */
+UNIV_INTERN
+ib_uint64_t
+dict_table_autoinc_read(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Updates the autoinc counter if the value supplied is greater than the
+current value. */
+UNIV_INTERN
+void
+dict_table_autoinc_update_if_greater(
+/*=================================*/
+
+	dict_table_t*	table,	/*!< in/out: table */
+	ib_uint64_t	value)	/*!< in: value which was assigned to a row */
+	__attribute__((nonnull));
+/********************************************************************//**
+Release the autoinc lock. */
+UNIV_INTERN
+void
+dict_table_autoinc_unlock(
+/*======================*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Adds system columns to a table object. */
+UNIV_INTERN
+void
+dict_table_add_system_columns(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	mem_heap_t*	heap)	/*!< in: temporary heap */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Adds a table object to the dictionary cache. */
+UNIV_INTERN
+void
+dict_table_add_to_cache(
+/*====================*/
+	dict_table_t*	table,		/*!< in: table */
+	ibool		can_be_evicted,	/*!< in: TRUE if can be evicted*/
+	mem_heap_t*	heap)		/*!< in: temporary heap */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Removes a table object from the dictionary cache. */
+UNIV_INTERN
+void
+dict_table_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table)	/*!< in, own: table */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Renames a table object.
+@return	TRUE if success */
+UNIV_INTERN
+dberr_t
+dict_table_rename_in_cache(
+/*=======================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	const char*	new_name,	/*!< in: new name */
+	ibool		rename_also_foreigns)
+					/*!< in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index)	/*!< in, own: index */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Change the id of a table object in the dictionary cache. This is used in
+DISCARD TABLESPACE. */
+UNIV_INTERN
+void
+dict_table_change_id_in_cache(
+/*==========================*/
+	dict_table_t*	table,	/*!< in/out: table object already in cache */
+	table_id_t	new_id)	/*!< in: new id to set */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Removes a foreign constraint struct from the dictionary cache. */
+UNIV_INTERN
+void
+dict_foreign_remove_from_cache(
+/*===========================*/
+	dict_foreign_t*	foreign)	/*!< in, own: foreign constraint */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Adds a foreign key constraint object to the dictionary cache. May free
+the object if there already is an object with the same identifier in.
+At least one of foreign table or referenced table must already be in
+the dictionary cache!
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_foreign_add_to_cache(
+/*======================*/
+	dict_foreign_t*		foreign,
+				/*!< in, own: foreign key constraint */
+	const char**		col_names,
+				/*!< in: column names, or NULL to use
+				foreign->foreign_table->col_names */
+	bool			check_charsets,
+				/*!< in: whether to check charset
+				compatibility */
+	dict_err_ignore_t	ignore_err)
+				/*!< in: error to be ignored */
+	__attribute__((nonnull(1), warn_unused_result));
+/*********************************************************************//**
+Check if the index is referenced by a foreign key, if TRUE return the
+matching instance NULL otherwise.
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_referenced_constraint(
+/*=================================*/
+	dict_table_t*	table,	/*!< in: InnoDB table */
+	dict_index_t*	index)	/*!< in: InnoDB index */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Checks if a table is referenced by foreign keys.
+@return	TRUE if table is referenced by a foreign key */
+UNIV_INTERN
+ibool
+dict_table_is_referenced_by_foreign_key(
+/*====================================*/
+	const dict_table_t*	table)	/*!< in: InnoDB table */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table.
+@return whether all replacements were found */
+UNIV_INTERN
+bool
+dict_foreign_replace_index(
+/*=======================*/
+	dict_table_t*		table,  /*!< in/out: table */
+	const char**		col_names,
+					/*!< in: column names, or NULL
+					to use table->col_names */
+	const dict_index_t*	index)	/*!< in: index to be replaced */
+	__attribute__((nonnull(1,3), warn_unused_result));
+/**********************************************************************//**
+Determines whether a string starts with the specified keyword.
+@return TRUE if str starts with keyword */
+UNIV_INTERN
+ibool
+dict_str_starts_with_keyword(
+/*=========================*/
+	THD*		thd,		/*!< in: MySQL thread handle */
+	const char*	str,		/*!< in: string to scan for keyword */
+	const char*	keyword)	/*!< in: keyword to look for */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Checks if a index is defined for a foreign key constraint. Index is a part
+of a foreign key constraint if the index is referenced by foreign key
+or index is a foreign key index
+@return pointer to foreign key struct if index is defined for foreign
+key, otherwise NULL */
+UNIV_INTERN
+dict_foreign_t*
+dict_table_get_foreign_constraint(
+/*==============================*/
+	dict_table_t*	table,	/*!< in: InnoDB table */
+	dict_index_t*	index)	/*!< in: InnoDB index */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_foreign_constraints(
+/*============================*/
+	trx_t*		trx,		/*!< in: transaction */
+	const char*	sql_string,	/*!< in: table create statement where
+					foreign keys are declared like:
+					FOREIGN KEY (a, b) REFERENCES
+					table2(c, d), table2 can be written
+					also with the database
+					name before it: test.table2; the
+					default database id the database of
+					parameter name */
+	size_t		sql_length,	/*!< in: length of sql_string */
+	const char*	name,		/*!< in: table full name in the
+					normalized form
+					database_name/table_name */
+	ibool		reject_fks)	/*!< in: if TRUE, fail with error
+					code DB_CANNOT_ADD_CONSTRAINT if
+					any foreign keys are found. */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
+@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
+constraint id does not match */
+UNIV_INTERN
+dberr_t
+dict_foreign_parse_drop_constraints(
+/*================================*/
+	mem_heap_t*	heap,			/*!< in: heap from which we can
+						allocate memory */
+	trx_t*		trx,			/*!< in: transaction */
+	dict_table_t*	table,			/*!< in: table */
+	ulint*		n,			/*!< out: number of constraints
+						to drop */
+	const char***	constraints_to_drop)	/*!< out: id's of the
+						constraints to drop */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Returns a table object and increments its open handle count.
+NOTE! This is a high-level function to be used mainly from outside the
+'dict' directory. Inside this directory dict_table_get_low
+is usually the appropriate function.
+@return	table, NULL if does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_table_open_on_name(
+/*====================*/
+	const char*	table_name,	/*!< in: table name */
+	ibool		dict_locked,	/*!< in: TRUE=data dictionary locked */
+	ibool		try_drop,	/*!< in: TRUE=try to drop any orphan
+					indexes after an aborted online
+					index creation */
+	dict_err_ignore_t
+			ignore_err)	/*!< in: error to be ignored when
+					loading the table */
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return	matching index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_foreign_find_index(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const char**		col_names,
+					/*!< in: column names, or NULL
+					to use table->col_names */
+	const char**		columns,/*!< in: array of column names */
+	ulint			n_cols,	/*!< in: number of columns */
+	const dict_index_t*	types_idx,
+					/*!< in: NULL or an index
+					whose types the column types
+					must match */
+	bool			check_charsets,
+					/*!< in: whether to check
+					charsets.  only has an effect
+					if types_idx != NULL */
+	ulint			check_null)
+					/*!< in: nonzero if none of
+					the columns must be declared
+					NOT NULL */
+	__attribute__((nonnull(1,3), warn_unused_result));
+/**********************************************************************//**
+Returns a column's name.
+@return column name. NOTE: not guaranteed to stay valid if table is
+modified in any way (columns added, etc.). */
+UNIV_INTERN
+const char*
+dict_table_get_col_name(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			col_nr)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Prints a table data. */
+UNIV_INTERN
+void
+dict_table_print(
+/*=============*/
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Outputs info on foreign keys of a table. */
+UNIV_INTERN
+void
+dict_print_info_on_foreign_keys(
+/*============================*/
+	ibool		create_table_format, /*!< in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
+	FILE*		file,	/*!< in: file where to print */
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Outputs info on a foreign key of a table in a format suitable for
+CREATE TABLE. */
+UNIV_INTERN
+void
+dict_print_info_on_foreign_key_in_create_format(
+/*============================================*/
+	FILE*		file,		/*!< in: file where to print */
+	trx_t*		trx,		/*!< in: transaction */
+	dict_foreign_t*	foreign,	/*!< in: foreign key constraint */
+	ibool		add_newline)	/*!< in: whether to add a newline */
+	__attribute__((nonnull(1,3)));
+/********************************************************************//**
+Displays the names of the index and the table. */
+UNIV_INTERN
+void
+dict_index_name_print(
+/*==================*/
+	FILE*			file,	/*!< in: output stream */
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_index_t*	index)	/*!< in: index to print */
+	__attribute__((nonnull(1,3)));
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return	matching index, NULL if not found */
+UNIV_INTERN
+bool
+dict_foreign_qualify_index(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const char**		col_names,
+					/*!< in: column names, or NULL
+					to use table->col_names */
+	const char**		columns,/*!< in: array of column names */
+	ulint			n_cols,	/*!< in: number of columns */
+	const dict_index_t*	index,	/*!< in: index to check */
+	const dict_index_t*	types_idx,
+					/*!< in: NULL or an index
+					whose types the column types
+					must match */
+	bool			check_charsets,
+					/*!< in: whether to check
+					charsets.  only has an effect
+					if types_idx != NULL */
+	ulint			check_null)
+					/*!< in: nonzero if none of
+					the columns must be declared
+					NOT NULL */
+	__attribute__((nonnull(1,3), warn_unused_result));
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the first index on the table (the clustered index).
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the last index on the table.
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the next index on the table.
+@return	index, NULL if none left */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+#else /* UNIV_DEBUG */
+# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
+# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
+# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/* Skip corrupted index */
+#define dict_table_skip_corrupt_index(index)			\
+	while (index && dict_index_is_corrupted(index)) {	\
+		index = dict_table_get_next_index(index);	\
+	}
+
+/* Get the next non-corrupt index */
+#define dict_table_next_uncorrupted_index(index)		\
+do {								\
+	index = dict_table_get_next_index(index);		\
+	dict_table_skip_corrupt_index(index);			\
+} while (0)
+
+/********************************************************************//**
+Check whether the index is the clustered index.
+@return	nonzero for clustered index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_clust(
+/*================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Check whether the index is unique.
+@return	nonzero for unique index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_unique(
+/*=================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Check whether the index is a secondary index or the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_sec_or_ibuf(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/************************************************************************
+Gets the all the FTS indexes for the table. NOTE: must not be called for
+tables which do not have an FTS-index. */
+UNIV_INTERN
+ulint
+dict_table_get_all_fts_indexes(
+/*===========================*/
+				/* out: number of indexes collected */
+	dict_table_t*	table,	/* in: table */
+	ib_vector_t*	indexes)/* out: vector for collecting FTS indexes */
+	__attribute__((nonnull));
+/********************************************************************//**
+Gets the number of user-defined columns in a table in the dictionary
+cache.
+@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Gets the number of system columns in a table in the dictionary cache.
+@return	number of system (e.g., ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Gets the number of all columns (also system) in a table in the dictionary
+cache.
+@return	number of columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return	estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth column of a table.
+@return	pointer to column object */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			pos)	/*!< in: position of column */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the given system column of a table.
+@return	pointer to column object */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
+	__attribute__((nonnull, warn_unused_result));
+#else /* UNIV_DEBUG */
+#define dict_table_get_nth_col(table, pos) \
+((table)->cols + (pos))
+#define dict_table_get_sys_col(table, sys) \
+((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
+#endif /* UNIV_DEBUG */
+/********************************************************************//**
+Gets the given system column number of a table.
+@return	column number */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns the minimum data size of an index record.
+@return	minimum data size in bytes */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Check whether the table uses the compact page format.
+@return	TRUE if table uses the compact page format */
+UNIV_INLINE
+ibool
+dict_table_is_comp(
+/*===============*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Determine the file format of a table.
+@return	file format version */
+UNIV_INLINE
+ulint
+dict_table_get_format(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Determine the file format from a dict_table_t::flags.
+@return	file format version */
+UNIV_INLINE
+ulint
+dict_tf_get_format(
+/*===============*/
+	ulint		flags)		/*!< in: dict_table_t::flags */
+	__attribute__((warn_unused_result));
+/********************************************************************//**
+Set the various values in a dict_table_t::flags pointer. */
+UNIV_INLINE
+void
+dict_tf_set(
+/*========*/
+	ulint*		flags,		/*!< in/out: table */
+	rec_format_t	format,		/*!< in: file format */
+	ulint		zip_ssize,	/*!< in: zip shift size */
+	bool		remote_path)	/*!< in: table uses DATA DIRECTORY */
+	__attribute__((nonnull));
+/********************************************************************//**
+Convert a 32 bit integer table flags to the 32 bit integer that is
+written into the tablespace header at the offset FSP_SPACE_FLAGS and is
+also stored in the fil_space_t::flags field.  The following chart shows
+the translation of the low order bit.  Other bits are the same.
+========================= Low order bit ==========================
+                    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+dict_table_t::flags |     0     |    1    |     1      |    1
+fil_space_t::flags  |     0     |    0    |     1      |    1
+==================================================================
+@return	tablespace flags (fil_space_t::flags) */
+UNIV_INLINE
+ulint
+dict_tf_to_fsp_flags(
+/*=================*/
+	ulint	flags)	/*!< in: dict_table_t::flags */
+	__attribute__((const));
+/********************************************************************//**
+Extract the compressed page size from table flags.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_zip_size(
+/*=================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+/********************************************************************//**
+Check whether the table uses the compressed compact page format.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_zip_size(
+/*================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Obtain exclusive locks on all index trees of the table. This is to prevent
+accessing index trees while InnoDB is updating internal metadata for
+operations such as truncate tables. */
+UNIV_INLINE
+void
+dict_table_x_lock_indexes(
+/*======================*/
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Release the exclusive locks on all index tree. */
+UNIV_INLINE
+void
+dict_table_x_unlock_indexes(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
+/********************************************************************//**
+Checks if a column is in the ordering columns of the clustered index of a
+table. Column prefixes are treated like whole columns.
+@return	TRUE if the column, or its prefix, is in the clustered key */
+UNIV_INTERN
+ibool
+dict_table_col_in_clustered_key(
+/*============================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Check if the table has an FTS index.
+@return TRUE if table has an FTS index */
+UNIV_INLINE
+ibool
+dict_table_has_fts_index(
+/*=====================*/
+	dict_table_t*   table)		/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Copies types of columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value.  This function should
+be called right after dtuple_create(). */
+UNIV_INTERN
+void
+dict_table_copy_types(
+/*==================*/
+	dtuple_t*		tuple,	/*!< in/out: data tuple */
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
+/********************************************************************
+Wait until all the background threads of the given table have exited, i.e.,
+bg_threads == 0. Note: bg_threads_mutex must be reserved when
+calling this. */
+UNIV_INTERN
+void
+dict_table_wait_for_bg_threads_to_exit(
+/*===================================*/
+	dict_table_t*	table,	/* in: table */
+	ulint		delay)	/* in: time in microseconds to wait between
+				checks of bg_threads. */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Looks for an index with the given id. NOTE that we do not reserve
+the dictionary mutex: this function is for emergency purposes like
+printing info of a corrupt database page!
+@return	index or NULL if not found from cache */
+UNIV_INTERN
+dict_index_t*
+dict_index_find_on_id_low(
+/*======================*/
+	index_id_t	id)	/*!< in: index id */
+	__attribute__((warn_unused_result));
+/**********************************************************************//**
+Make room in the table cache by evicting an unused table. The unused table
+should not be part of FK relationship and currently not used in any user
+transaction. There is no guarantee that it will remove a table.
+@return number of tables evicted. */
+UNIV_INTERN
+ulint
+dict_make_room_in_cache(
+/*====================*/
+	ulint		max_tables,	/*!< in: max tables allowed in cache */
+	ulint		pct_check);	/*!< in: max percent to check */
+/**********************************************************************//**
+Adds an index to the dictionary cache.
+@return	DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
+UNIV_INTERN
+dberr_t
+dict_index_add_to_cache(
+/*====================*/
+	dict_table_t*	table,	/*!< in: table on which the index is */
+	dict_index_t*	index,	/*!< in, own: index; NOTE! The index memory
+				object is freed in this function! */
+	ulint		page_no,/*!< in: root page number of the index */
+	ibool		strict)	/*!< in: TRUE=refuse to create the index
+				if records could be too big to fit in
+				an B-tree page */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index)	/*!< in, own: index */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: an internal
+					representation of index (in
+					the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the number of user-defined ordering fields in the index. In the internal
+representation we add the row id to the ordering fields to make all indexes
+unique, but this function returns the number of fields the user defined
+in the index as ordering fields.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+	__attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth field of an index.
+@return	pointer to field object */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of field */
+	__attribute__((nonnull, warn_unused_result));
+#else /* UNIV_DEBUG */
+# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
+#endif /* UNIV_DEBUG */
+/********************************************************************//**
+Gets pointer to the nth column in an index.
+@return	column */
+UNIV_INLINE
+const dict_col_t*
+dict_index_get_nth_col(
+/*===================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of the field */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the column number of the nth field in an index.
+@return	column number */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of the field */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_col_or_prefix_pos(
+/*=================================*/
+	const dict_index_t*	index,		/*!< in: index */
+	ulint			n,		/*!< in: column number */
+	ibool			inc_prefix)	/*!< in: TRUE=consider
+						column prefixes too */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Returns TRUE if the index contains a column or a prefix of that column.
+@return	TRUE if contains the column or its prefix */
+UNIV_INTERN
+ibool
+dict_index_contains_col_or_prefix(
+/*==============================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Looks for a matching field in an index. The column has to be the same. The
+column in index must be complete, or must contain a prefix longer than the
+column in index2. That is, we must be able to construct the prefix in index2
+from the prefix in index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_field_pos(
+/*=========================*/
+	const dict_index_t*	index,	/*!< in: index from which to search */
+	const dict_index_t*	index2,	/*!< in: index */
+	ulint			n)	/*!< in: field number in index2 */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Looks for column n position in the clustered index.
+@return	position in internal representation of the clustered index */
+UNIV_INTERN
+ulint
+dict_table_get_nth_col_pos(
+/*=======================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			n)	/*!< in: column number */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Returns the position of a system column in an index.
+@return	position, ULINT_UNDEFINED if not contained */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			type)	/*!< in: DATA_ROW_ID, ... */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Adds a column to index. */
+UNIV_INTERN
+void
+dict_index_add_col(
+/*===============*/
+	dict_index_t*		index,		/*!< in/out: index */
+	const dict_table_t*	table,		/*!< in: table */
+	dict_col_t*		col,		/*!< in: column */
+	ulint			prefix_len)	/*!< in: column prefix length */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Copies types of fields contained in index to tuple. */
+UNIV_INTERN
+void
+dict_index_copy_types(
+/*==================*/
+	dtuple_t*		tuple,		/*!< in/out: data tuple */
+	const dict_index_t*	index,		/*!< in: index */
+	ulint			n_fields)	/*!< in: number of
+						field types to copy */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Gets the field column.
+@return	field->col, pointer to the table column */
+UNIV_INLINE
+const dict_col_t*
+dict_field_get_col(
+/*===============*/
+	const dict_field_t*	field)	/*!< in: index field */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+Assumes that dict_sys->mutex is already being held.
+@return	index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache_low(
+/*===========================*/
+	index_id_t	index_id)	/*!< in: index id */
+	__attribute__((warn_unused_result));
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+@return	index, NULL if not found */
+UNIV_INTERN
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+	index_id_t	index_id)	/*!< in: index id */
+	__attribute__((warn_unused_result));
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+dict_index_check_search_tuple(
+/*==========================*/
+	const dict_index_t*	index,	/*!< in: index tree */
+	const dtuple_t*		tuple)	/*!< in: tuple used in a search */
+	__attribute__((nonnull, warn_unused_result));
+/** Whether and when to allow temporary index names */
+enum check_name {
+	/** Require all indexes to be complete. */
+	CHECK_ALL_COMPLETE,
+	/** Allow aborted online index creation. */
+	CHECK_ABORTED_OK,
+	/** Allow partial indexes to exist. */
+	CHECK_PARTIAL_OK
+};
+/**********************************************************************//**
+Check for duplicate index entries in a table [using the index name] */
+UNIV_INTERN
+void
+dict_table_check_for_dup_indexes(
+/*=============================*/
+	const dict_table_t*	table,	/*!< in: Check for dup indexes
+					in this table */
+	enum check_name		check)	/*!< in: whether and when to allow
+					temporary index names */
+	__attribute__((nonnull));
+#endif /* UNIV_DEBUG */
+/**********************************************************************//**
+Builds a node pointer out of a physical record and a page number.
+@return	own: node pointer */
+UNIV_INTERN
+dtuple_t*
+dict_index_build_node_ptr(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record for which to build node
+					pointer */
+	ulint			page_no,/*!< in: page number to put in node
+					pointer */
+	mem_heap_t*		heap,	/*!< in: memory heap where pointer
+					created */
+	ulint			level)	/*!< in: level of rec in tree:
+					0 means leaf level */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Copies an initial segment of a physical record, long enough to specify an
+index entry uniquely.
+@return	pointer to the prefix record */
+UNIV_INTERN
+rec_t*
+dict_index_copy_rec_order_prefix(
+/*=============================*/
+	const dict_index_t*	index,	/*!< in: index */
+	const rec_t*		rec,	/*!< in: record for which to
+					copy prefix */
+	ulint*			n_fields,/*!< out: number of fields copied */
+	byte**			buf,	/*!< in/out: memory buffer for the
+					copied prefix, or NULL */
+	ulint*			buf_size)/*!< in/out: buffer size */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Builds a typed data tuple out of a physical record.
+@return	own: data tuple */
+UNIV_INTERN
+dtuple_t*
+dict_index_build_data_tuple(
+/*========================*/
+	dict_index_t*	index,	/*!< in: index */
+	rec_t*		rec,	/*!< in: record for which to build data tuple */
+	ulint		n_fields,/*!< in: number of data fields */
+	mem_heap_t*	heap)	/*!< in: memory heap where tuple created */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets the space id of the root of the index tree.
+@return	space id */
+UNIV_INLINE
+ulint
+dict_index_get_space(
+/*=================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_index_set_space(
+/*=================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	ulint		space)	/*!< in: space id */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Gets the page number of the root of the index tree.
+@return	page number */
+UNIV_INLINE
+ulint
+dict_index_get_page(
+/*================*/
+	const dict_index_t*	tree)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Gets the read-write lock of the index tree.
+@return	read-write lock */
+UNIV_INLINE
+rw_lock_t*
+dict_index_get_lock(
+/*================*/
+	dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index.
+@return	number of free bytes on page, reserved for updates */
+UNIV_INLINE
+ulint
+dict_index_get_space_reserve(void);
+/*==============================*/
+
+/* Online index creation @{ */
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: secondary index */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+	dict_index_t*			index,	/*!< in/out: index */
+	enum online_index_status	status)	/*!< in: status */
+	__attribute__((nonnull));
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Calculates the minimum record length in an index. */
+UNIV_INTERN
+ulint
+dict_index_calc_min_rec_len(
+/*========================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Reserves the dictionary system mutex for MySQL. */
+UNIV_INTERN
+void
+dict_mutex_enter_for_mysql(void);
+/*============================*/
+/********************************************************************//**
+Releases the dictionary system mutex for MySQL. */
+UNIV_INTERN
+void
+dict_mutex_exit_for_mysql(void);
+/*===========================*/
+
+/** Create a dict_table_t's stats latch or delay for lazy creation.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out]	table	table whose stats latch to create
+@param[in]	enabled	if false then the latch is disabled
+and dict_table_stats_lock()/unlock() become noop on this table. */
+
+void
+dict_table_stats_latch_create(
+	dict_table_t*	table,
+	bool		enabled);
+
+/** Destroy a dict_table_t's stats latch.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out]	table	table whose stats latch to destroy */
+
+void
+dict_table_stats_latch_destroy(
+	dict_table_t*	table);
+
+/**********************************************************************//**
+Lock the appropriate latch to protect a given table's statistics.
+table->id is used to pick the corresponding latch from a global array of
+latches. */
+UNIV_INTERN
+void
+dict_table_stats_lock(
+/*==================*/
+	dict_table_t*	table,		/*!< in: table */
+	ulint		latch_mode);	/*!< in: RW_S_LATCH or RW_X_LATCH */
+/**********************************************************************//**
+Unlock the latch that has been locked by dict_table_stats_lock() */
+UNIV_INTERN
+void
+dict_table_stats_unlock(
+/*====================*/
+	dict_table_t*	table,		/*!< in: table */
+	ulint		latch_mode);	/*!< in: RW_S_LATCH or RW_X_LATCH */
+/********************************************************************//**
+Checks if the database name in two table names is the same.
+@return	TRUE if same db name */
+UNIV_INTERN
+ibool
+dict_tables_have_same_db(
+/*=====================*/
+	const char*	name1,	/*!< in: table name in the form
+				dbname '/' tablename */
+	const char*	name2)	/*!< in: table name in the form
+				dbname '/' tablename */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Removes an index from the cache */
+UNIV_INTERN
+void
+dict_index_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	dict_index_t*	index)	/*!< in, own: index */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Get index by name
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name(
+/*=========================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name)	/*!< in: name of the index to find */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+In case there is more than one index with the same name return the index
+with the min(id).
+@return	index, NULL if does not exist */
+UNIV_INTERN
+dict_index_t*
+dict_table_get_index_on_name_and_min_id(
+/*====================================*/
+	dict_table_t*	table,	/*!< in: table */
+	const char*	name)	/*!< in: name of the index to find */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************
+Check whether a column exists in an FTS index. */
+UNIV_INLINE
+ulint
+dict_table_is_fts_column(
+/*=====================*/
+				/* out: ULINT_UNDEFINED if no match else
+				the offset within the vector */
+	ib_vector_t*	indexes,/* in: vector containing only FTS indexes */
+	ulint		col_no)	/* in: col number to search for */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Move a table to the non LRU end of the LRU list. */
+UNIV_INTERN
+void
+dict_table_move_from_lru_to_non_lru(
+/*================================*/
+	dict_table_t*	table)	/*!< in: table to move from LRU to non-LRU */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Move a table to the LRU list from the non-LRU list. */
+UNIV_INTERN
+void
+dict_table_move_from_non_lru_to_lru(
+/*================================*/
+	dict_table_t*	table)	/*!< in: table to move from non-LRU to LRU */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Move to the most recently used segment of the LRU list. */
+UNIV_INTERN
+void
+dict_move_to_mru(
+/*=============*/
+	dict_table_t*	table)	/*!< in: table to move to MRU */
+	__attribute__((nonnull));
+
+/** Maximum number of columns in a foreign key constraint. Please Note MySQL
+has a much lower limit on the number of columns allowed in a foreign key
+constraint */
+#define MAX_NUM_FK_COLUMNS		500
+
+/* Buffers for storing detailed information about the latest foreign key
+and unique key errors */
+extern FILE*	dict_foreign_err_file;
+extern ib_mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
+
+/** the dictionary system */
+extern dict_sys_t*	dict_sys;
+/** the data dictionary rw-latch protecting dict_sys */
+extern rw_lock_t	dict_operation_lock;
+
+/* Dictionary system struct */
+struct dict_sys_t{
+	ib_mutex_t		mutex;		/*!< mutex protecting the data
+					dictionary; protects also the
+					disk-based dictionary system tables;
+					this mutex serializes CREATE TABLE
+					and DROP TABLE, as well as reading
+					the dictionary data for a table from
+					system tables */
+	row_id_t	row_id;		/*!< the next row id to assign;
+					NOTE that at a checkpoint this
+					must be written to the dict system
+					header and flushed to a file; in
+					recovery this must be derived from
+					the log records */
+	hash_table_t*	table_hash;	/*!< hash table of the tables, based
+					on name */
+	hash_table_t*	table_id_hash;	/*!< hash table of the tables, based
+					on id */
+	ulint		size;		/*!< varying space in bytes occupied
+					by the data dictionary table and
+					index objects */
+	dict_table_t*	sys_tables;	/*!< SYS_TABLES table */
+	dict_table_t*	sys_columns;	/*!< SYS_COLUMNS table */
+	dict_table_t*	sys_indexes;	/*!< SYS_INDEXES table */
+	dict_table_t*	sys_fields;	/*!< SYS_FIELDS table */
+
+	/*=============================*/
+	UT_LIST_BASE_NODE_T(dict_table_t)
+			table_LRU;	/*!< List of tables that can be evicted
+					from the cache */
+	UT_LIST_BASE_NODE_T(dict_table_t)
+			table_non_LRU;	/*!< List of tables that can't be
+					evicted from the cache */
+};
+#endif /* !UNIV_HOTBACKUP */
+
+/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
+extern dict_index_t*	dict_ind_redundant;
+/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
+extern dict_index_t*	dict_ind_compact;
+
+/**********************************************************************//**
+Inits dict_ind_redundant and dict_ind_compact. */
+UNIV_INTERN
+void
+dict_ind_init(void);
+/*===============*/
+
+/* Auxiliary structs for checking a table definition @{ */
+
+/* This struct is used to specify the name and type that a column must
+have when checking a table's schema. */
+struct dict_col_meta_t {
+	const char*	name;		/* column name */
+	ulint		mtype;		/* required column main type */
+	ulint		prtype_mask;	/* required column precise type mask;
+					if this is non-zero then all the
+					bits it has set must also be set
+					in the column's prtype */
+	ulint		len;		/* required column length */
+};
+
+/* This struct is used for checking whether a given table exists and
+whether it has a predefined schema (number of columns and columns names
+and types) */
+struct dict_table_schema_t {
+	const char*		table_name;	/* the name of the table whose
+						structure we are checking */
+	ulint			n_cols;		/* the number of columns the
+						table must have */
+	dict_col_meta_t*	columns;	/* metadata for the columns;
+						this array has n_cols
+						elements */
+	ulint			n_foreign;	/* number of foreign keys this
+						table has, pointing to other
+						tables (where this table is
+						FK child) */
+	ulint			n_referenced;	/* number of foreign keys other
+						tables have, pointing to this
+						table (where this table is
+						parent) */
+};
+/* @} */
+
+/*********************************************************************//**
+Checks whether a table exists and whether it has the given structure.
+The table must have the same number of columns with the same names and
+types. The order of the columns does not matter.
+The caller must own the dictionary mutex.
+dict_table_schema_check() @{
+@return DB_SUCCESS if the table exists and contains the necessary columns */
+UNIV_INTERN
+dberr_t
+dict_table_schema_check(
+/*====================*/
+	dict_table_schema_t*	req_schema,	/*!< in/out: required table
+						schema */
+	char*			errstr,		/*!< out: human readable error
+						message if != DB_SUCCESS and
+						!= DB_TABLE_NOT_FOUND is
+						returned */
+	size_t			errstr_sz)	/*!< in: errstr size */
+	__attribute__((nonnull, warn_unused_result));
+/* @} */
+
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+UNIV_INTERN
+void
+dict_fs2utf8(
+/*=========*/
+	const char*	db_and_table,	/*!< in: database and table names,
+					e.g. d@i1b/a@q1b@1Kc */
+	char*		db_utf8,	/*!< out: database name, e.g. dцb */
+	size_t		db_utf8_size,	/*!< in: dbname_utf8 size */
+	char*		table_utf8,	/*!< out: table name, e.g. aюbØc */
+	size_t		table_utf8_size)/*!< in: table_utf8 size */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Closes the data dictionary module. */
+UNIV_INTERN
+void
+dict_close(void);
+/*============*/
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Check whether the table is corrupted.
+@return	nonzero for corrupted table, zero for valid tables */
+UNIV_INLINE
+ulint
+dict_table_is_corrupted(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+
+/**********************************************************************//**
+Check whether the index is corrupted.
+@return	nonzero for corrupted index, zero for valid indexes */
+UNIV_INLINE
+ulint
+dict_index_is_corrupted(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Flags an index and table corrupted both in the data dictionary cache
+and in the system table SYS_INDEXES. */
+UNIV_INTERN
+void
+dict_set_corrupted(
+/*===============*/
+	dict_index_t*	index,	/*!< in/out: index */
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	ctx)	/*!< in: context */
+	UNIV_COLD __attribute__((nonnull));
+
+/**********************************************************************//**
+Flags an index corrupted in the data dictionary cache only. This
+is used mostly to mark a corrupted index when index's own dictionary
+is corrupted, and we force to load such index for repair purpose */
+UNIV_INTERN
+void
+dict_set_corrupted_index_cache_only(
+/*================================*/
+	dict_index_t*	index,		/*!< in/out: index */
+	dict_table_t*	table)		/*!< in/out: table */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Flags a table with specified space_id corrupted in the table dictionary
+cache.
+@return TRUE if successful */
+UNIV_INTERN
+ibool
+dict_set_corrupted_by_space(
+/*========================*/
+	ulint		space_id);	/*!< in: space ID */
+
+/********************************************************************//**
+Validate the table flags.
+@return	true if valid. */
+UNIV_INLINE
+bool
+dict_tf_is_valid(
+/*=============*/
+	ulint		flags)		/*!< in: table flags */
+	__attribute__((warn_unused_result));
+
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return	true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return	true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_success(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index to be updated. */
+	__attribute__((nonnull));
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_failure(
+/*===================*/
+	dict_index_t*	index)	/*!< in/out: index to be updated. */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page may not compress*/
+UNIV_INTERN
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+	dict_index_t*	index)	/*!< in: index for which page size
+				is requested */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name */
+UNIV_INTERN
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+	ulint	table_flag);		/*!< in: row format setting */
+/*****************************************************************//**
+Get index by first field of the index
+@return index which is having first field matches
+with the field present in field_index position of table */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index_on_first_col(
+/*==============================*/
+	const dict_table_t*	table,		/*!< in: table */
+	ulint			col_index);	/*!< in: position of column
+						in table */
+
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "dict0dict.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
new file mode 100644
index 00000000000..066ffe47e4a
--- /dev/null
+++ b/storage/innobase/include/dict0dict.ic
@@ -0,0 +1,1433 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0dict.ic
+Data dictionary system
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "data0type.h"
+#ifndef UNIV_HOTBACKUP
+#include "dict0load.h"
+#include "rem0types.h"
+#include "fsp0fsp.h"
+#include "srv0srv.h"
+#include "sync0rw.h" /* RW_S_LATCH */
+
+/*********************************************************************//**
+Gets the minimum number of bytes per character.
+@return minimum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbminlen(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	return(DATA_MBMINLEN(col->mbminmaxlen));
+}
+/*********************************************************************//**
+Gets the maximum number of bytes per character.
+@return maximum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbmaxlen(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	return(DATA_MBMAXLEN(col->mbminmaxlen));
+}
+/*********************************************************************//**
+Sets the minimum and maximum number of bytes per character. */
+UNIV_INLINE
+void
+dict_col_set_mbminmaxlen(
+/*=====================*/
+	dict_col_t*	col,		/*!< in/out: column */
+	ulint		mbminlen,	/*!< in: minimum multi-byte
+					character size, in bytes */
+	ulint		mbmaxlen)	/*!< in: minimum multi-byte
+					character size, in bytes */
+{
+	ut_ad(mbminlen < DATA_MBMAX);
+	ut_ad(mbmaxlen < DATA_MBMAX);
+	ut_ad(mbminlen <= mbmaxlen);
+
+	col->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
+}
+/*********************************************************************//**
+Gets the column data type. */
+UNIV_INLINE
+void
+dict_col_copy_type(
+/*===============*/
+	const dict_col_t*	col,	/*!< in: column */
+	dtype_t*		type)	/*!< out: data type */
+{
+	ut_ad(col && type);
+
+	type->mtype = col->mtype;
+	type->prtype = col->prtype;
+	type->len = col->len;
+	type->mbminmaxlen = col->mbminmaxlen;
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Assert that a column and a data type match.
+@return	TRUE */
+UNIV_INLINE
+ibool
+dict_col_type_assert_equal(
+/*=======================*/
+	const dict_col_t*	col,	/*!< in: column */
+	const dtype_t*		type)	/*!< in: data type */
+{
+	ut_ad(col);
+	ut_ad(type);
+
+	ut_ad(col->mtype == type->mtype);
+	ut_ad(col->prtype == type->prtype);
+	//ut_ad(col->len == type->len);
+# ifndef UNIV_HOTBACKUP
+	ut_ad(col->mbminmaxlen == type->mbminmaxlen);
+# endif /* !UNIV_HOTBACKUP */
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Returns the minimum size of the column.
+@return	minimum size */
+UNIV_INLINE
+ulint
+dict_col_get_min_size(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
+				      col->mbminmaxlen));
+}
+/***********************************************************************//**
+Returns the maximum size of the column.
+@return	maximum size */
+UNIV_INLINE
+ulint
+dict_col_get_max_size(
+/*==================*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	return(dtype_get_max_size_low(col->mtype, col->len));
+}
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+Returns the size of a fixed size column, 0 if not a fixed size column.
+@return	fixed size, or 0 */
+UNIV_INLINE
+ulint
+dict_col_get_fixed_size(
+/*====================*/
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT */
+{
+	return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
+					col->mbminmaxlen, comp));
+}
+/***********************************************************************//**
+Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
+For fixed length types it is the fixed length of the type, otherwise 0.
+@return	SQL null storage size in ROW_FORMAT=REDUNDANT */
+UNIV_INLINE
+ulint
+dict_col_get_sql_null_size(
+/*=======================*/
+	const dict_col_t*	col,	/*!< in: column */
+	ulint			comp)	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
+{
+	return(dict_col_get_fixed_size(col, comp));
+}
+
+/*********************************************************************//**
+Gets the column number.
+@return	col->ind, table column position (starting from 0) */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	const dict_col_t*	col)	/*!< in: column */
+{
+	ut_ad(col);
+
+	return(col->ind);
+}
+
+/*********************************************************************//**
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	const dict_col_t*	col,		/*!< in: table column */
+	const dict_index_t*	clust_index)	/*!< in: clustered index */
+{
+	ulint	i;
+
+	ut_ad(col);
+	ut_ad(clust_index);
+	ut_ad(dict_index_is_clust(clust_index));
+
+	for (i = 0; i < clust_index->n_def; i++) {
+		const dict_field_t*	field = &clust_index->fields[i];
+
+		if (!field->prefix_len && field->col == col) {
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the first index on the table (the clustered index).
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
+}
+
+/********************************************************************//**
+Gets the last index on the table.
+@return	index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(UT_LIST_GET_LAST((const_cast<dict_table_t*>(table))
+				->indexes));
+}
+
+/********************************************************************//**
+Gets the next index on the table.
+@return	index, NULL if none left */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
+}
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Check whether the index is the clustered index.
+@return	nonzero for clustered index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_clust(
+/*================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_CLUSTERED);
+}
+/********************************************************************//**
+Check whether the index is unique.
+@return	nonzero for unique index, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_unique(
+/*=================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_UNIQUE);
+}
+
+/********************************************************************//**
+Check whether the index is the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_ibuf(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_IBUF);
+}
+
+/********************************************************************//**
+Check whether the index is an universal index tree.
+@return	nonzero for universal tree, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_univ(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->type & DICT_UNIVERSAL);
+}
+
+/********************************************************************//**
+Check whether the index is a secondary index or the insert buffer tree.
+@return	nonzero for insert buffer, zero for other indexes */
+UNIV_INLINE
+ulint
+dict_index_is_sec_or_ibuf(
+/*======================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ulint	type;
+
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	type = index->type;
+
+	return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF));
+}
+
+/********************************************************************//**
+Gets the number of user-defined columns in a table in the dictionary
+cache.
+@return	number of user-defined (e.g., not ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_cols - DATA_N_SYS_COLS);
+}
+
+/********************************************************************//**
+Gets the number of system columns in a table in the dictionary cache.
+@return	number of system (e.g., ROW_ID) columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+	const dict_table_t*	table __attribute__((unused)))	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+
+	return(DATA_N_SYS_COLS);
+}
+
+/********************************************************************//**
+Gets the number of all columns (also system) in a table in the dictionary
+cache.
+@return	number of columns of a table */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_cols);
+}
+
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return	estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table->stat_initialized);
+
+	return(table->stat_n_rows);
+}
+
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	if (table->stat_initialized) {
+		ib_uint64_t	n_rows = table->stat_n_rows;
+		if (n_rows < 0xFFFFFFFFFFFFFFFFULL) {
+			table->stat_n_rows = n_rows + 1;
+		}
+	}
+}
+
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	if (table->stat_initialized) {
+		ib_uint64_t	n_rows = table->stat_n_rows;
+		if (n_rows > 0) {
+			table->stat_n_rows = n_rows - 1;
+		}
+	}
+}
+
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth column of a table.
+@return	pointer to column object */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			pos)	/*!< in: position of column */
+{
+	ut_ad(table);
+	ut_ad(pos < table->n_def);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return((dict_col_t*) (table->cols) + pos);
+}
+
+/********************************************************************//**
+Gets the given system column of a table.
+@return	pointer to column object */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
+{
+	dict_col_t*	col;
+
+	ut_ad(table);
+	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	col = dict_table_get_nth_col(table, table->n_cols
+				     - DATA_N_SYS_COLS + sys);
+	ut_ad(col->mtype == DATA_SYS);
+	ut_ad(col->prtype == (sys | DATA_NOT_NULL));
+
+	return(col);
+}
+#endif /* UNIV_DEBUG */
+
+/********************************************************************//**
+Gets the given system column number of a table.
+@return	column number */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			sys)	/*!< in: DATA_ROW_ID, ... */
+{
+	ut_ad(table);
+	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_cols - DATA_N_SYS_COLS + sys);
+}
+
+/********************************************************************//**
+Check whether the table uses the compact page format.
+@return	TRUE if table uses the compact page format */
+UNIV_INLINE
+ibool
+dict_table_is_comp(
+/*===============*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+
+#if DICT_TF_COMPACT != 1
+#error "DICT_TF_COMPACT must be 1"
+#endif
+
+	return(table->flags & DICT_TF_COMPACT);
+}
+
+/************************************************************************
+Check if the table has an FTS index. */
+UNIV_INLINE
+ibool
+dict_table_has_fts_index(
+/*=====================*/
+				/* out: TRUE if table has an FTS index */
+	dict_table_t*   table)  /* in: table */
+{
+	ut_ad(table);
+
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS));
+}
+
+/********************************************************************//**
+Validate the table flags.
+@return	true if valid. */
+UNIV_INLINE
+bool
+dict_tf_is_valid(
+/*=============*/
+	ulint	flags)		/*!< in: table flags */
+{
+	ulint	compact = DICT_TF_GET_COMPACT(flags);
+	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
+	ulint	unused = DICT_TF_GET_UNUSED(flags);
+
+	/* Make sure there are no bits that we do not know about. */
+	if (unused != 0) {
+
+		return(false);
+
+	} else if (atomic_blobs) {
+		/* Barracuda row formats COMPRESSED and DYNAMIC build on
+		the page structure introduced for the COMPACT row format
+		by allowing keys in secondary indexes to be made from
+		data stored off-page in the clustered index. */
+
+		if (!compact) {
+			return(false);
+		}
+
+	} else if (zip_ssize) {
+
+		/* Antelope does not support COMPRESSED row format. */
+		return(false);
+	}
+
+	if (zip_ssize) {
+
+		/* COMPRESSED row format must have compact and atomic_blobs
+		bits set and validate the number is within allowed range. */
+
+		if (!compact
+		    || !atomic_blobs
+		    || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+
+			return(false);
+		}
+	}
+
+	/* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
+	so the DATA_DIR flag is compatible with all other table flags. */
+
+	return(true);
+}
+
+/********************************************************************//**
+Validate a SYS_TABLES TYPE field and return it.
+@return	Same as input after validating it as a SYS_TABLES TYPE field.
+If there is an error, return ULINT_UNDEFINED. */
+UNIV_INLINE
+ulint
+dict_sys_tables_type_validate(
+/*==========================*/
+	ulint	type,		/*!< in: SYS_TABLES.TYPE */
+	ulint	n_cols)		/*!< in: SYS_TABLES.N_COLS */
+{
+	ulint	low_order_bit = DICT_TF_GET_COMPACT(type);
+	ulint	redundant = !(n_cols & DICT_N_COLS_COMPACT);
+	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
+	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
+	ulint	unused = DICT_TF_GET_UNUSED(type);
+
+	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
+	If the format is UNIV_FORMAT_B or higher, this field is the same
+	as dict_table_t::flags. Zero is not allowed here. */
+	if (!low_order_bit) {
+		return(ULINT_UNDEFINED);
+	}
+
+	if (redundant) {
+		if (zip_ssize || atomic_blobs) {
+			return(ULINT_UNDEFINED);
+		}
+	}
+
+	/* Make sure there are no bits that we do not know about. */
+	if (unused) {
+		return(ULINT_UNDEFINED);
+	}
+
+	if (atomic_blobs) {
+		/* Barracuda row formats COMPRESSED and DYNAMIC build on
+		the page structure introduced for the COMPACT row format
+		by allowing keys in secondary indexes to be made from
+		data stored off-page in the clustered index.
+
+		The DICT_N_COLS_COMPACT flag should be in N_COLS,
+		but we already know that. */
+
+	} else if (zip_ssize) {
+		/* Antelope does not support COMPRESSED format. */
+		return(ULINT_UNDEFINED);
+	}
+
+	if (zip_ssize) {
+		/* COMPRESSED row format must have low_order_bit and
+		atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
+		should be in N_COLS, but we already know about the
+		low_order_bit and DICT_N_COLS_COMPACT flags. */
+		if (!atomic_blobs) {
+			return(ULINT_UNDEFINED);
+		}
+
+		/* Validate that the number is within allowed range. */
+		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+			return(ULINT_UNDEFINED);
+		}
+	}
+
+	/* There is nothing to validate for the data_dir field.
+	CREATE TABLE ... DATA DIRECTORY is supported for any row
+	format, so the DATA_DIR flag is compatible with any other
+	table flags. However, it is not used with TEMPORARY tables.*/
+
+	/* Return the validated SYS_TABLES.TYPE. */
+	return(type);
+}
+
+/********************************************************************//**
+Determine the file format from dict_table_t::flags
+The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
+other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
+@return	file format version */
+UNIV_INLINE
+rec_format_t
+dict_tf_get_rec_format(
+/*===================*/
+	ulint		flags)	/*!< in: dict_table_t::flags */
+{
+	ut_a(dict_tf_is_valid(flags));
+
+	if (!DICT_TF_GET_COMPACT(flags)) {
+		return(REC_FORMAT_REDUNDANT);
+	}
+
+	if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
+		return(REC_FORMAT_COMPACT);
+	}
+
+	if (DICT_TF_GET_ZIP_SSIZE(flags)) {
+		return(REC_FORMAT_COMPRESSED);
+	}
+
+	return(REC_FORMAT_DYNAMIC);
+}
+
+/********************************************************************//**
+Determine the file format from a dict_table_t::flags.
+@return	file format version */
+UNIV_INLINE
+ulint
+dict_tf_get_format(
+/*===============*/
+	ulint		flags)	/*!< in: dict_table_t::flags */
+{
+	if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
+		return(UNIV_FORMAT_B);
+	}
+
+	return(UNIV_FORMAT_A);
+}
+
+/********************************************************************//**
+Determine the file format of a table.
+@return	file format version */
+UNIV_INLINE
+ulint
+dict_table_get_format(
+/*==================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+
+	return(dict_tf_get_format(table->flags));
+}
+
+/********************************************************************//**
+Set the file format and zip size in a dict_table_t::flags.  If zip size
+is not needed, it should be 0. */
+UNIV_INLINE
+void
+dict_tf_set(
+/*========*/
+	ulint*		flags,		/*!< in/out: table flags */
+	rec_format_t	format,		/*!< in: file format */
+	ulint		zip_ssize,	/*!< in: zip shift size */
+	bool		use_data_dir)	/*!< in: table uses DATA DIRECTORY */
+{
+	switch (format) {
+	case REC_FORMAT_REDUNDANT:
+		*flags = 0;
+		ut_ad(zip_ssize == 0);
+		break;
+	case REC_FORMAT_COMPACT:
+		*flags = DICT_TF_COMPACT;
+		ut_ad(zip_ssize == 0);
+		break;
+	case REC_FORMAT_COMPRESSED:
+		*flags = DICT_TF_COMPACT
+			| (1 << DICT_TF_POS_ATOMIC_BLOBS)
+			| (zip_ssize << DICT_TF_POS_ZIP_SSIZE);
+		break;
+	case REC_FORMAT_DYNAMIC:
+		*flags = DICT_TF_COMPACT
+			| (1 << DICT_TF_POS_ATOMIC_BLOBS);
+		ut_ad(zip_ssize == 0);
+		break;
+	}
+
+	if (use_data_dir) {
+		*flags |= (1 << DICT_TF_POS_DATA_DIR);
+	}
+}
+
+/********************************************************************//**
+Convert a 32 bit integer table flags to the 32 bit integer that is
+written into the tablespace header at the offset FSP_SPACE_FLAGS and is
+also stored in the fil_space_t::flags field.  The following chart shows
+the translation of the low order bit.  Other bits are the same.
+========================= Low order bit ==========================
+                    | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
+dict_table_t::flags |     0     |    1    |     1      |    1
+fil_space_t::flags  |     0     |    0    |     1      |    1
+==================================================================
+@return	tablespace flags (fil_space_t::flags) */
+UNIV_INLINE
+ulint
+dict_tf_to_fsp_flags(
+/*=================*/
+	ulint	table_flags)	/*!< in: dict_table_t::flags */
+{
+	ulint fsp_flags;
+
+	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
+			return(ULINT_UNDEFINED););
+
+	/* Adjust bit zero. */
+	fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
+
+	/* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
+	fsp_flags |= table_flags & DICT_TF_MASK_ZIP_SSIZE;
+	fsp_flags |= table_flags & DICT_TF_MASK_ATOMIC_BLOBS;
+
+	/* In addition, tablespace flags also contain the page size. */
+	fsp_flags |= fsp_flags_set_page_size(fsp_flags, UNIV_PAGE_SIZE);
+
+	/* The DATA_DIR flag is in a different position in fsp_flag */
+	fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
+		     ? FSP_FLAGS_MASK_DATA_DIR : 0;
+
+	ut_a(fsp_flags_is_valid(fsp_flags));
+
+	return(fsp_flags);
+}
+
+/********************************************************************//**
+Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags
+The following chart shows the translation of the low order bit.
+Other bits are the same.
+========================= Low order bit ==========================
+                    | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+SYS_TABLES.TYPE     |     1     |    1    |     1
+dict_table_t::flags |     0     |    1    |     1
+==================================================================
+@return	ulint containing SYS_TABLES.TYPE */
+UNIV_INLINE
+ulint
+dict_sys_tables_type_to_tf(
+/*=======================*/
+	ulint	type,	/*!< in: SYS_TABLES.TYPE field */
+	ulint	n_cols)	/*!< in: SYS_TABLES.N_COLS field */
+{
+	ulint	flags;
+	ulint	redundant = !(n_cols & DICT_N_COLS_COMPACT);
+
+	/* Adjust bit zero. */
+	flags = redundant ? 0 : 1;
+
+	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	flags |= type & (DICT_TF_MASK_ZIP_SSIZE
+			 | DICT_TF_MASK_ATOMIC_BLOBS
+			 | DICT_TF_MASK_DATA_DIR);
+
+	return(flags);
+}
+
+/********************************************************************//**
+Convert a 32 bit integer table flags to the 32bit integer that is written
+to a SYS_TABLES.TYPE field. The following chart shows the translation of
+the low order bit.  Other bits are the same.
+========================= Low order bit ==========================
+                    | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+dict_table_t::flags |     0     |    1    |     1
+SYS_TABLES.TYPE     |     1     |    1    |     1
+==================================================================
+@return	ulint containing SYS_TABLES.TYPE */
+UNIV_INLINE
+ulint
+dict_tf_to_sys_tables_type(
+/*=======================*/
+	ulint	flags)	/*!< in: dict_table_t::flags */
+{
+	ulint type;
+
+	ut_a(dict_tf_is_valid(flags));
+
+	/* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
+	type = 1;
+
+	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	type |= flags & (DICT_TF_MASK_ZIP_SSIZE
+			 | DICT_TF_MASK_ATOMIC_BLOBS
+			 | DICT_TF_MASK_DATA_DIR);
+
+	return(type);
+}
+
+/********************************************************************//**
+Extract the compressed page size from dict_table_t::flags.
+These flags are in memory, so assert that they are valid.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_zip_size(
+/*=================*/
+	ulint	flags)	/*!< in: flags */
+{
+	ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
+	ulint zip_size = (zip_ssize
+			  ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize
+			  : 0);
+
+	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+
+	return(zip_size);
+}
+
+/********************************************************************//**
+Check whether the table uses the compressed compact page format.
+@return	compressed page size, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_zip_size(
+/*================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+
+	return(dict_tf_get_zip_size(table->flags));
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Obtain exclusive locks on all index trees of the table. This is to prevent
+accessing index trees while InnoDB is updating internal metadata for
+operations such as truncate tables. */
+UNIV_INLINE
+void
+dict_table_x_lock_indexes(
+/*======================*/
+	dict_table_t*	table)	/*!< in: table */
+{
+	dict_index_t*   index;
+
+	ut_a(table);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	/* Loop through each index of the table and lock them */
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+		rw_lock_x_lock(dict_index_get_lock(index));
+	}
+}
+
+/*********************************************************************//**
+Release the exclusive locks on all index tree. */
+UNIV_INLINE
+void
+dict_table_x_unlock_indexes(
+/*========================*/
+	dict_table_t*	table)	/*!< in: table */
+{
+	dict_index_t*   index;
+
+	ut_a(table);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+		rw_lock_x_unlock(dict_index_get_lock(index));
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: an internal
+					representation of index (in
+					the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->n_fields);
+}
+
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+
+	return(index->n_uniq);
+}
+
+/********************************************************************//**
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+
+	if (dict_index_is_clust(index)) {
+
+		return(dict_index_get_n_unique(index));
+	}
+
+	return(dict_index_get_n_fields(index));
+}
+
+/********************************************************************//**
+Gets the number of user-defined ordering fields in the index. In the internal
+representation of clustered indexes we add the row id to the ordering fields
+to make a clustered index unique, but this function returns the number of
+fields the user defined in the index as ordering fields.
+@return	number of fields */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+	const dict_index_t*	index)	/*!< in: an internal representation
+					of index (in the dictionary cache) */
+{
+	return(index->n_user_defined_cols);
+}
+
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Gets the nth field of an index.
+@return	pointer to field object */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of field */
+{
+	ut_ad(index);
+	ut_ad(pos < index->n_def);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return((dict_field_t*) (index->fields) + pos);
+}
+#endif /* UNIV_DEBUG */
+
+/********************************************************************//**
+Returns the position of a system column in an index.
+@return	position, ULINT_UNDEFINED if not contained */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			type)	/*!< in: DATA_ROW_ID, ... */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(!dict_index_is_univ(index));
+
+	if (dict_index_is_clust(index)) {
+
+		return(dict_col_get_clust_pos(
+			       dict_table_get_sys_col(index->table, type),
+			       index));
+	}
+
+	return(dict_index_get_nth_col_pos(
+		       index, dict_table_get_sys_col_no(index->table, type)));
+}
+
+/*********************************************************************//**
+Gets the field column.
+@return	field->col, pointer to the table column */
+UNIV_INLINE
+const dict_col_t*
+dict_field_get_col(
+/*===============*/
+	const dict_field_t*	field)	/*!< in: index field */
+{
+	ut_ad(field);
+
+	return(field->col);
+}
+
+/********************************************************************//**
+Gets pointer to the nth column in an index.
+@return	column */
+UNIV_INLINE
+const dict_col_t*
+dict_index_get_nth_col(
+/*===================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of the field */
+{
+	return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
+}
+
+/********************************************************************//**
+Gets the column number the nth field in an index.
+@return	column number */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			pos)	/*!< in: position of the field */
+{
+	return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
+}
+
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			n)	/*!< in: column number */
+{
+	return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE));
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Returns the minimum data size of an index record.
+@return	minimum data size in bytes */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ulint	n	= dict_index_get_n_fields(index);
+	ulint	size	= 0;
+
+	while (n--) {
+		size += dict_col_get_min_size(dict_index_get_nth_col(index,
+								     n));
+	}
+
+	return(size);
+}
+
+/*********************************************************************//**
+Gets the space id of the root of the index tree.
+@return	space id */
+UNIV_INLINE
+ulint
+dict_index_get_space(
+/*=================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->space);
+}
+
+/*********************************************************************//**
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_index_set_space(
+/*=================*/
+	dict_index_t*	index,	/*!< in/out: index */
+	ulint		space)	/*!< in: space id */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	index->space = space;
+}
+
+/*********************************************************************//**
+Gets the page number of the root of the index tree.
+@return	page number */
+UNIV_INLINE
+ulint
+dict_index_get_page(
+/*================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->page);
+}
+
+/*********************************************************************//**
+Gets the read-write lock of the index tree.
+@return	read-write lock */
+UNIV_INLINE
+rw_lock_t*
+dict_index_get_lock(
+/*================*/
+	dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(&(index->lock));
+}
+
+/********************************************************************//**
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index.
+@return	number of free bytes on page, reserved for updates */
+UNIV_INLINE
+ulint
+dict_index_get_space_reserve(void)
+/*==============================*/
+{
+	return(UNIV_PAGE_SIZE / 16);
+}
+
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+	const dict_index_t*	index)	/*!< in: secondary index */
+{
+	enum online_index_status	status;
+
+	status = (enum online_index_status) index->online_status;
+
+	/* Without the index->lock protection, the online
+	status can change from ONLINE_INDEX_CREATION to
+	ONLINE_INDEX_COMPLETE (or ONLINE_INDEX_ABORTED) in
+	row_log_apply() once log application is done. So to make
+	sure the status is ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE
+	you should always do the recheck after acquiring index->lock */
+
+#ifdef UNIV_DEBUG
+	switch (status) {
+	case ONLINE_INDEX_COMPLETE:
+	case ONLINE_INDEX_CREATION:
+	case ONLINE_INDEX_ABORTED:
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		return(status);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(status);
+}
+
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+	dict_index_t*			index,	/*!< in/out: index */
+	enum online_index_status	status)	/*!< in: status */
+{
+	ut_ad(!(index->type & DICT_FTS));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	switch (dict_index_get_online_status(index)) {
+	case ONLINE_INDEX_COMPLETE:
+	case ONLINE_INDEX_CREATION:
+		break;
+	case ONLINE_INDEX_ABORTED:
+		ut_ad(status == ONLINE_INDEX_ABORTED_DROPPED);
+		break;
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	index->online_status = status;
+	ut_ad(dict_index_get_online_status(index) == status);
+}
+
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+#ifdef UNIV_DEBUG
+	if (dict_index_is_clust(index)) {
+		switch (dict_index_get_online_status(index)) {
+		case ONLINE_INDEX_CREATION:
+			return(true);
+		case ONLINE_INDEX_COMPLETE:
+			return(false);
+		case ONLINE_INDEX_ABORTED:
+		case ONLINE_INDEX_ABORTED_DROPPED:
+			break;
+		}
+		ut_ad(0);
+		return(false);
+	}
+#endif /* UNIV_DEBUG */
+
+	return(UNIV_UNLIKELY(dict_index_get_online_status(index)
+			     != ONLINE_INDEX_COMPLETE));
+}
+
+/**********************************************************************//**
+Check whether a column exists in an FTS index.
+@return ULINT_UNDEFINED if no match else the offset within the vector */
+UNIV_INLINE
+ulint
+dict_table_is_fts_column(
+/*=====================*/
+	ib_vector_t*	indexes,/*!< in: vector containing only FTS indexes */
+	ulint		col_no)	/*!< in: col number to search for */
+
+{
+	ulint		i;
+
+	for (i = 0; i < ib_vector_size(indexes); ++i) {
+		dict_index_t*	index;
+
+		index = (dict_index_t*) ib_vector_getp(indexes, i);
+
+		if (dict_index_contains_col_or_prefix(index, col_no)) {
+
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/**********************************************************************//**
+Determine bytes of column prefix to be stored in the undo log. Please
+note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+needs to be stored in the undo log.
+@return bytes of column prefix to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_field_len_store_undo(
+/*==========================*/
+	dict_table_t*		table,	/*!< in: table */
+	const dict_col_t*	col)	/*!< in: column which index prefix
+					is based on */
+{
+	ulint	prefix_len = 0;
+
+	if (dict_table_get_format(table) >= UNIV_FORMAT_B)
+	{
+		prefix_len = col->max_prefix
+			? col->max_prefix
+			: DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+	}
+
+	return(prefix_len);
+}
+
+/********************************************************************//**
+Check whether the table is corrupted.
+@return	nonzero for corrupted table, zero for valid tables */
+UNIV_INLINE
+ulint
+dict_table_is_corrupted(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->corrupted);
+}
+
+/********************************************************************//**
+Check whether the index is corrupted.
+@return	nonzero for corrupted index, zero for valid indexes */
+UNIV_INLINE
+ulint
+dict_index_is_corrupted(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return((index->type & DICT_CORRUPT)
+	       || (index->table && index->table->corrupted));
+}
+
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return	true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
+}
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return	true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+	const dict_table_t*	table)	/*!< in: table to check */
+{
+	return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
+}
+
+/**********************************************************************//**
+Get index by first field of the index
+@return index which is having first field matches
+with the field present in field_index position of table */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index_on_first_col(
+/*==============================*/
+	const dict_table_t*	table,		/*!< in: table */
+	ulint			col_index)	/*!< in: position of column
+						in table */
+{
+	ut_ad(col_index < table->n_cols);
+
+	dict_col_t* column = dict_table_get_nth_col(table, col_index);
+
+	for (dict_index_t* index = dict_table_get_first_index(table);
+		index != NULL; index = dict_table_get_next_index(index)) {
+
+		if (index->fields[0].col == column) {
+			return(index);
+		}
+	}
+	ut_error;
+	return(0);
+}
+
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
new file mode 100644
index 00000000000..030190b1a8e
--- /dev/null
+++ b/storage/innobase/include/dict0load.h
@@ -0,0 +1,428 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0load.h
+Loads to the memory cache database object definitions
+from dictionary tables
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0load_h
+#define dict0load_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "ut0byte.h"
+#include "mem0mem.h"
+#include "btr0types.h"
+
+/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
+enum dict_system_id_t {
+	SYS_TABLES = 0,
+	SYS_INDEXES,
+	SYS_COLUMNS,
+	SYS_FIELDS,
+	SYS_FOREIGN,
+	SYS_FOREIGN_COLS,
+	SYS_TABLESPACES,
+	SYS_DATAFILES,
+
+	/* This must be last item. Defines the number of system tables. */
+	SYS_NUM_SYSTEM_TABLES
+};
+
+/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
+enum dict_table_info_t {
+	DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
+					structure with information from
+					a SYS_TABLES record */
+	DICT_TABLE_LOAD_FROM_CACHE = 1	/*!< Check first whether dict_table_t
+					is in the cache, if so, return it */
+};
+
+/** Check type for dict_check_tablespaces_and_store_max_id() */
+enum dict_check_t {
+	/** No user tablespaces have been opened
+	(no crash recovery, no transactions recovered). */
+	DICT_CHECK_NONE_LOADED = 0,
+	/** Some user tablespaces may have been opened
+	(no crash recovery; recovered table locks for transactions). */
+	DICT_CHECK_SOME_LOADED,
+	/** All user tablespaces have been opened (crash recovery). */
+	DICT_CHECK_ALL_LOADED
+};
+
+/********************************************************************//**
+In a crash recovery we already have all the tablespace objects created.
+This function compares the space id information in the InnoDB data dictionary
+to what we already read with fil_load_single_table_tablespaces().
+
+In a normal startup, we create the tablespace objects for every table in
+InnoDB's data dictionary, if the corresponding .ibd file exists.
+We also scan the biggest space id, and store it to fil_system. */
+UNIV_INTERN
+void
+dict_check_tablespaces_and_store_max_id(
+/*====================================*/
+	dict_check_t	dict_check);	/*!< in: how to check */
+/********************************************************************//**
+Finds the first table name in the given database.
+@return own: table name, NULL if does not exist; the caller must free
+the memory in the string! */
+UNIV_INTERN
+char*
+dict_get_first_table_name_in_db(
+/*============================*/
+	const char*	name);	/*!< in: database name which ends to '/' */
+
+/********************************************************************//**
+Loads a table definition from a SYS_TABLES record to dict_table_t.
+Does not load any columns or indexes.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_table_low(
+/*================*/
+	const char*	name,		/*!< in: table name */
+	const rec_t*	rec,		/*!< in: SYS_TABLES record */
+	dict_table_t**	table);		/*!< out,own: table, or NULL */
+/********************************************************************//**
+Loads a table column definition from a SYS_COLUMNS record to
+dict_table_t.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_column_low(
+/*=================*/
+	dict_table_t*	table,		/*!< in/out: table, could be NULL
+					if we just populate a dict_column_t
+					struct with information from
+					a SYS_COLUMNS record */
+	mem_heap_t*	heap,		/*!< in/out: memory heap
+					for temporary storage */
+	dict_col_t*	column,		/*!< out: dict_column_t to fill,
+					or NULL if table != NULL */
+	table_id_t*	table_id,	/*!< out: table id */
+	const char**	col_name,	/*!< out: column name */
+	const rec_t*	rec);		/*!< in: SYS_COLUMNS record */
+/********************************************************************//**
+Loads an index definition from a SYS_INDEXES record to dict_index_t.
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record.  @return
+error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_index_low(
+/*================*/
+	byte*		table_id,	/*!< in/out: table id (8 bytes),
+					an "in" value if allocate=TRUE
+					and "out" when allocate=FALSE */
+	const char*	table_name,	/*!< in: table name */
+	mem_heap_t*	heap,		/*!< in/out: temporary memory heap */
+	const rec_t*	rec,		/*!< in: SYS_INDEXES record */
+	ibool		allocate,	/*!< in: TRUE=allocate *index,
+					FALSE=fill in a pre-allocated
+					*index */
+	dict_index_t**	index);		/*!< out,own: index, or NULL */
+/********************************************************************//**
+Loads an index field definition from a SYS_FIELDS record to
+dict_index_t.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_field_low(
+/*================*/
+	byte*		index_id,	/*!< in/out: index id (8 bytes)
+					an "in" value if index != NULL
+					and "out" if index == NULL */
+	dict_index_t*	index,		/*!< in/out: index, could be NULL
+					if we just populate a dict_field_t
+					struct with information from
+					a SYS_FIELDS record */
+	dict_field_t*	sys_field,	/*!< out: dict_field_t to be
+					filled */
+	ulint*		pos,		/*!< out: Field position */
+	byte*		last_index_id,	/*!< in: last index id */
+	mem_heap_t*	heap,		/*!< in/out: memory heap
+					for temporary storage */
+	const rec_t*	rec);		/*!< in: SYS_FIELDS record */
+/********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and put a null byte before the extension.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+UNIV_INTERN
+void
+dict_save_data_dir_path(
+/*====================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	char*		filepath);	/*!< in: filepath of tablespace */
+/*****************************************************************//**
+Make sure the data_file_name is saved in dict_table_t if needed. Try to
+read it from the file dictionary first, then from SYS_DATAFILES. */
+UNIV_INTERN
+void
+dict_get_and_save_data_dir_path(
+/*============================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	bool		dict_mutex_own);	/*!< in: true if dict_sys->mutex
+					is owned already */
+/********************************************************************//**
+Loads a table definition and also all its index definitions, and also
+the cluster definition if the table is a member in a cluster. Also loads
+all foreign key constraints where the foreign key is in the table or where
+a foreign key references columns in this table.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+ibd_file_missing flag TRUE in the table object we return */
+UNIV_INTERN
+dict_table_t*
+dict_load_table(
+/*============*/
+	const char*	name,	/*!< in: table name in the
+				databasename/tablename format */
+	ibool		cached,	/*!< in: TRUE=add to cache, FALSE=do not */
+	dict_err_ignore_t ignore_err);
+				/*!< in: error to be ignored when loading
+				table and its indexes' definition */
+/***********************************************************************//**
+Loads a table object based on the table id.
+@return	table; NULL if table does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+	table_id_t		table_id,	/*!< in: table id */
+	dict_err_ignore_t	ignore_err);	/*!< in: errors to ignore
+						when loading the table */
+/********************************************************************//**
+This function is called when the database is booted.
+Loads system table index definitions except for the clustered index which
+is added to the dictionary cache at booting before calling this function. */
+UNIV_INTERN
+void
+dict_load_sys_table(
+/*================*/
+	dict_table_t*	table);	/*!< in: system table */
+/***********************************************************************//**
+Loads foreign key constraints where the table is either the foreign key
+holder or where the table is referenced by a foreign key. Adds these
+constraints to the data dictionary. Note that we know that the dictionary
+cache already contains all constraints where the other relevant table is
+already in the dictionary cache.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_load_foreigns(
+/*===============*/
+	const char*		table_name,	/*!< in: table name */
+	const char**		col_names,	/*!< in: column names, or NULL
+						to use table->col_names */
+	bool			check_recursive,/*!< in: Whether to check
+						recursive load of tables
+						chained by FK */
+	bool			check_charsets,	/*!< in: whether to check
+						charset compatibility */
+	dict_err_ignore_t	ignore_err)	/*!< in: error to be ignored */
+	__attribute__((nonnull(1), warn_unused_result));
+/********************************************************************//**
+Prints to the standard output information on all tables found in the data
+dictionary system table. */
+UNIV_INTERN
+void
+dict_print(void);
+/*============*/
+
+/********************************************************************//**
+This function opens a system table, and return the first record.
+@return	first record of the system table */
+UNIV_INTERN
+const rec_t*
+dict_startscan_system(
+/*==================*/
+	btr_pcur_t*	pcur,		/*!< out: persistent cursor to
+					the record */
+	mtr_t*		mtr,		/*!< in: the mini-transaction */
+	dict_system_id_t system_id);	/*!< in: which system table to open */
+/********************************************************************//**
+This function get the next system table record as we scan the table.
+@return	the record if found, NULL if end of scan. */
+UNIV_INTERN
+const rec_t*
+dict_getnext_system(
+/*================*/
+	btr_pcur_t*	pcur,		/*!< in/out: persistent cursor
+					to the record */
+	mtr_t*		mtr);		/*!< in: the mini-transaction */
+/********************************************************************//**
+This function processes one SYS_TABLES record and populate the dict_table_t
+struct for the table. Extracted out of dict_print() to be used by
+both monitor table output and information schema innodb_sys_tables output.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tables_rec_and_mtr_commit(
+/*=======================================*/
+	mem_heap_t*	heap,		/*!< in: temporary memory heap */
+	const rec_t*	rec,		/*!< in: SYS_TABLES record */
+	dict_table_t**	table,		/*!< out: dict_table_t to fill */
+	dict_table_info_t status,	/*!< in: status bit controls
+					options such as whether we shall
+					look for dict_table_t from cache
+					first */
+	mtr_t*		mtr);		/*!< in/out: mini-transaction,
+					will be committed */
+/********************************************************************//**
+This function parses a SYS_INDEXES record and populate a dict_index_t
+structure with the information from the record. For detail information
+about SYS_INDEXES fields, please refer to dict_boot() function.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_indexes_rec(
+/*=========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_INDEXES rec */
+	dict_index_t*	index,		/*!< out: dict_index_t to be
+					filled */
+	table_id_t*	table_id);	/*!< out: table id */
+/********************************************************************//**
+This function parses a SYS_COLUMNS record and populate a dict_column_t
+structure with the information from the record.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_columns_rec(
+/*=========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_COLUMNS rec */
+	dict_col_t*	column,		/*!< out: dict_col_t to be filled */
+	table_id_t*	table_id,	/*!< out: table id */
+	const char**	col_name);	/*!< out: column name */
+/********************************************************************//**
+This function parses a SYS_FIELDS record and populate a dict_field_t
+structure with the information from the record.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_fields_rec(
+/*========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_FIELDS rec */
+	dict_field_t*	sys_field,	/*!< out: dict_field_t to be
+					filled */
+	ulint*		pos,		/*!< out: Field position */
+	index_id_t*	index_id,	/*!< out: current index id */
+	index_id_t	last_id);	/*!< in: previous index id */
+/********************************************************************//**
+This function parses a SYS_FOREIGN record and populate a dict_foreign_t
+structure with the information from the record. For detail information
+about SYS_FOREIGN fields, please refer to dict_load_foreign() function
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_foreign_rec(
+/*=========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_FOREIGN rec */
+	dict_foreign_t*	foreign);	/*!< out: dict_foreign_t to be
+					filled */
+/********************************************************************//**
+This function parses a SYS_FOREIGN_COLS record and extract necessary
+information from the record and return to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_foreign_col_rec(
+/*=============================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_FOREIGN_COLS rec */
+	const char**	name,		/*!< out: foreign key constraint name */
+	const char**	for_col_name,	/*!< out: referencing column name */
+	const char**	ref_col_name,	/*!< out: referenced column name
+					in referenced table */
+	ulint*		pos);		/*!< out: column position */
+/********************************************************************//**
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tablespaces(
+/*=========================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_TABLESPACES rec */
+	ulint*		space,		/*!< out: pace id */
+	const char**	name,		/*!< out: tablespace name */
+	ulint*		flags);		/*!< out: tablespace flags */
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_DATAFILES rec */
+	ulint*		space,		/*!< out: pace id */
+	const char**	path);		/*!< out: datafile path */
+/********************************************************************//**
+Get the filepath for a spaceid from SYS_DATAFILES. This function provides
+a temporary heap which is used for the table lookup, but not for the path.
+The caller must free the memory for the path returned. This function can
+return NULL if the space ID is not found in SYS_DATAFILES, then the caller
+will assume that the ibd file is in the normal datadir.
+@return	own: A copy of the first datafile found in SYS_DATAFILES.PATH for
+the given space ID. NULL if space ID is zero or not found. */
+UNIV_INTERN
+char*
+dict_get_first_path(
+/*================*/
+	ulint		space,	/*!< in: space id */
+	const char*	name);	/*!< in: tablespace name */
+/********************************************************************//**
+Update the record for space_id in SYS_TABLESPACES to this filepath.
+@return	DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_update_filepath(
+/*=================*/
+	ulint		space_id,	/*!< in: space id */
+	const char*	filepath);	/*!< in: filepath */
+/********************************************************************//**
+Insert records into SYS_TABLESPACES and SYS_DATAFILES.
+@return	DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_insert_tablespace_and_filepath(
+/*================================*/
+	ulint		space,		/*!< in: space id */
+	const char*	name,		/*!< in: talespace name */
+	const char*	filepath,	/*!< in: filepath */
+	ulint		fsp_flags);	/*!< in: tablespace flags */
+
+#ifndef UNIV_NONINL
+#include "dict0load.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic
new file mode 100644
index 00000000000..2c0f1ff38a5
--- /dev/null
+++ b/storage/innobase/include/dict0load.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0load.ic
+Loads to the memory cache database object definitions
+from dictionary tables
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
new file mode 100644
index 00000000000..460a7e125ad
--- /dev/null
+++ b/storage/innobase/include/dict0mem.h
@@ -0,0 +1,1214 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0mem.h
+Data dictionary memory object creation
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0mem_h
+#define dict0mem_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+#include "row0types.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#ifndef UNIV_HOTBACKUP
+# include "lock0types.h"
+# include "que0types.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "hash0hash.h"
+#include "trx0types.h"
+#include "fts0fts.h"
+#include "os0once.h"
+#include <set>
+#include <algorithm>
+#include <iterator>
+
+/* Forward declaration. */
+struct ib_rbt_t;
+
+/** Type flags of an index: OR'ing of the flags is allowed to define a
+combination of types */
+/* @{ */
+#define DICT_CLUSTERED	1	/*!< clustered index */
+#define DICT_UNIQUE	2	/*!< unique index */
+#define	DICT_UNIVERSAL	4	/*!< index which can contain records from any
+				other index */
+#define	DICT_IBUF	8	/*!< insert buffer tree */
+#define	DICT_CORRUPT	16	/*!< bit to store the corrupted flag
+				in SYS_INDEXES.TYPE */
+#define	DICT_FTS	32	/* FTS index; can't be combined with the
+				other flags */
+
+#define	DICT_IT_BITS	6	/*!< number of bits used for
+				SYS_INDEXES.TYPE */
+/* @} */
+
+#if 0 /* not implemented, retained for history */
+/** Types for a table object */
+#define DICT_TABLE_ORDINARY		1 /*!< ordinary table */
+#define	DICT_TABLE_CLUSTER_MEMBER	2
+#define	DICT_TABLE_CLUSTER		3 /* this means that the table is
+					  really a cluster definition */
+#endif
+
+/* Table and tablespace flags are generally not used for the Antelope file
+format except for the low order bit, which is used differently depending on
+where the flags are stored.
+
+==================== Low order flags bit =========================
+                    | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+SYS_TABLES.TYPE     |     1     |    1    |     1
+dict_table_t::flags |     0     |    1    |     1
+FSP_SPACE_FLAGS     |     0     |    0    |     1
+fil_space_t::flags  |     0     |    0    |     1
+
+Before the 5.1 plugin, SYS_TABLES.TYPE was always DICT_TABLE_ORDINARY (1)
+and the tablespace flags field was always 0. In the 5.1 plugin, these fields
+were repurposed to identify compressed and dynamic row formats.
+
+The following types and constants describe the flags found in dict_table_t
+and SYS_TABLES.TYPE.  Similar flags found in fil_space_t and FSP_SPACE_FLAGS
+are described in fsp0fsp.h. */
+
+/* @{ */
+/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
+#define DICT_TF_REDUNDANT		0	/*!< Redundant row format. */
+/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
+#define DICT_TF_COMPACT			1	/*!< Compact row format. */
+
+/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
+the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
+#define DICT_N_COLS_COMPACT	0x80000000UL
+
+/** Width of the COMPACT flag */
+#define DICT_TF_WIDTH_COMPACT		1
+/** Width of the ZIP_SSIZE flag */
+#define DICT_TF_WIDTH_ZIP_SSIZE		4
+/** Width of the ATOMIC_BLOBS flag.  The Antelope file formats broke up
+BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
+Brracuda row formats store the whole blob or text field off-page atomically.
+Secondary indexes are created from this external data using row_ext_t
+to cache the BLOB prefixes. */
+#define DICT_TF_WIDTH_ATOMIC_BLOBS	1
+/** If a table is created with the MYSQL option DATA DIRECTORY and
+innodb-file-per-table, an older engine will not be able to find that table.
+This flag prevents older engines from attempting to open the table and
+allows InnoDB to update_create_info() accordingly. */
+#define DICT_TF_WIDTH_DATA_DIR		1
+
+/** Width of all the currently known table flags */
+#define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
+			+ DICT_TF_WIDTH_ZIP_SSIZE	\
+			+ DICT_TF_WIDTH_ATOMIC_BLOBS	\
+			+ DICT_TF_WIDTH_DATA_DIR)
+
+/** A mask of all the known/used bits in table flags */
+#define DICT_TF_BIT_MASK	(~(~0 << DICT_TF_BITS))
+
+/** Zero relative shift position of the COMPACT field */
+#define DICT_TF_POS_COMPACT		0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define DICT_TF_POS_ZIP_SSIZE		(DICT_TF_POS_COMPACT		\
+					+ DICT_TF_WIDTH_COMPACT)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define DICT_TF_POS_ATOMIC_BLOBS	(DICT_TF_POS_ZIP_SSIZE		\
+					+ DICT_TF_WIDTH_ZIP_SSIZE)
+/** Zero relative shift position of the DATA_DIR field */
+#define DICT_TF_POS_DATA_DIR		(DICT_TF_POS_ATOMIC_BLOBS	\
+					+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define DICT_TF_POS_UNUSED		(DICT_TF_POS_DATA_DIR		\
+					+ DICT_TF_WIDTH_DATA_DIR)
+
+/** Bit mask of the COMPACT field */
+#define DICT_TF_MASK_COMPACT				\
+		((~(~0 << DICT_TF_WIDTH_COMPACT))	\
+		<< DICT_TF_POS_COMPACT)
+/** Bit mask of the ZIP_SSIZE field */
+#define DICT_TF_MASK_ZIP_SSIZE				\
+		((~(~0 << DICT_TF_WIDTH_ZIP_SSIZE))	\
+		<< DICT_TF_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define DICT_TF_MASK_ATOMIC_BLOBS			\
+		((~(~0 << DICT_TF_WIDTH_ATOMIC_BLOBS))	\
+		<< DICT_TF_POS_ATOMIC_BLOBS)
+/** Bit mask of the DATA_DIR field */
+#define DICT_TF_MASK_DATA_DIR				\
+		((~(~0 << DICT_TF_WIDTH_DATA_DIR))	\
+		<< DICT_TF_POS_DATA_DIR)
+
+/** Return the value of the COMPACT field */
+#define DICT_TF_GET_COMPACT(flags)			\
+		((flags & DICT_TF_MASK_COMPACT)		\
+		>> DICT_TF_POS_COMPACT)
+/** Return the value of the ZIP_SSIZE field */
+#define DICT_TF_GET_ZIP_SSIZE(flags)			\
+		((flags & DICT_TF_MASK_ZIP_SSIZE)	\
+		>> DICT_TF_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define DICT_TF_HAS_ATOMIC_BLOBS(flags)			\
+		((flags & DICT_TF_MASK_ATOMIC_BLOBS)	\
+		>> DICT_TF_POS_ATOMIC_BLOBS)
+/** Return the value of the ATOMIC_BLOBS field */
+#define DICT_TF_HAS_DATA_DIR(flags)			\
+		((flags & DICT_TF_MASK_DATA_DIR)	\
+		>> DICT_TF_POS_DATA_DIR)
+/** Return the contents of the UNUSED bits */
+#define DICT_TF_GET_UNUSED(flags)			\
+		(flags >> DICT_TF_POS_UNUSED)
+/* @} */
+
+/** @brief Table Flags set number 2.
+
+These flags will be stored in SYS_TABLES.MIX_LEN.  All unused flags
+will be written as 0.  The column may contain garbage for tables
+created with old versions of InnoDB that only implemented
+ROW_FORMAT=REDUNDANT.  InnoDB engines do not check these flags
+for unknown bits in order to protect backward incompatibility. */
+/* @{ */
+/** Total number of bits in table->flags2. */
+#define DICT_TF2_BITS			7
+#define DICT_TF2_BIT_MASK		~(~0 << DICT_TF2_BITS)
+
+/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
+#define DICT_TF2_TEMPORARY		1
+/** The table has an internal defined DOC ID column */
+#define DICT_TF2_FTS_HAS_DOC_ID		2
+/** The table has an FTS index */
+#define DICT_TF2_FTS			4
+/** Need to add Doc ID column for FTS index build.
+This is a transient bit for index build */
+#define DICT_TF2_FTS_ADD_DOC_ID		8
+/** This bit is used during table creation to indicate that it will
+use its own tablespace instead of the system tablespace. */
+#define DICT_TF2_USE_TABLESPACE		16
+
+/** Set when we discard/detach the tablespace */
+#define DICT_TF2_DISCARDED		32
+
+/** This bit is set if all aux table names (both common tables and
+index tables) of a FTS table are in HEX format. */
+#define DICT_TF2_FTS_AUX_HEX_NAME	64
+/* @} */
+
+#define DICT_TF2_FLAG_SET(table, flag)				\
+	(table->flags2 |= (flag))
+
+#define DICT_TF2_FLAG_IS_SET(table, flag)			\
+	(table->flags2 & (flag))
+
+#define DICT_TF2_FLAG_UNSET(table, flag)			\
+	(table->flags2 &= ~(flag))
+
+/** Tables could be chained together with Foreign key constraint. When
+first load the parent table, we would load all of its descedents.
+This could result in rescursive calls and out of stack error eventually.
+DICT_FK_MAX_RECURSIVE_LOAD defines the maximum number of recursive loads,
+when exceeded, the child table will not be loaded. It will be loaded when
+the foreign constraint check needs to be run. */
+#define DICT_FK_MAX_RECURSIVE_LOAD	20
+
+/** Similarly, when tables are chained together with foreign key constraints
+with on cascading delete/update clause, delete from parent table could
+result in recursive cascading calls. This defines the maximum number of
+such cascading deletes/updates allowed. When exceeded, the delete from
+parent table will fail, and user has to drop excessive foreign constraint
+before proceeds. */
+#define FK_MAX_CASCADE_DEL		255
+
+/**********************************************************************//**
+Creates a table memory object.
+@return	own: table object */
+UNIV_INTERN
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+	const char*	name,		/*!< in: table name */
+	ulint		space,		/*!< in: space where the clustered index
+					of the table is placed */
+	ulint		n_cols,		/*!< in: number of columns */
+	ulint		flags,		/*!< in: table flags */
+	ulint		flags2);	/*!< in: table flags2 */
+/****************************************************************//**
+Free a table memory object. */
+UNIV_INTERN
+void
+dict_mem_table_free(
+/*================*/
+	dict_table_t*	table);		/*!< in: table */
+/**********************************************************************//**
+Adds a column definition to a table. */
+UNIV_INTERN
+void
+dict_mem_table_add_col(
+/*===================*/
+	dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*	heap,	/*!< in: temporary memory heap, or NULL */
+	const char*	name,	/*!< in: column name, or NULL */
+	ulint		mtype,	/*!< in: main datatype */
+	ulint		prtype,	/*!< in: precise type */
+	ulint		len)	/*!< in: precision */
+	__attribute__((nonnull(1)));
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+UNIV_INTERN
+void
+dict_mem_table_col_rename(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	unsigned	nth_col,/*!< in: column index */
+	const char*	from,	/*!< in: old column name */
+	const char*	to)	/*!< in: new column name */
+	__attribute__((nonnull));
+/**********************************************************************//**
+This function populates a dict_col_t memory structure with
+supplied information. */
+UNIV_INTERN
+void
+dict_mem_fill_column_struct(
+/*========================*/
+	dict_col_t*	column,		/*!< out: column struct to be
+					filled */
+	ulint		col_pos,	/*!< in: column position */
+	ulint		mtype,		/*!< in: main data type */
+	ulint		prtype,		/*!< in: precise type */
+	ulint		col_len);	/*!< in: column length */
+/**********************************************************************//**
+This function poplulates a dict_index_t index memory structure with
+supplied information. */
+UNIV_INLINE
+void
+dict_mem_fill_index_struct(
+/*=======================*/
+	dict_index_t*	index,		/*!< out: index to be filled */
+	mem_heap_t*	heap,		/*!< in: memory heap */
+	const char*	table_name,	/*!< in: table name */
+	const char*	index_name,	/*!< in: index name */
+	ulint		space,		/*!< in: space where the index tree is
+					placed, ignored if the index is of
+					the clustered type */
+	ulint		type,		/*!< in: DICT_UNIQUE,
+					DICT_CLUSTERED, ... ORed */
+	ulint		n_fields);	/*!< in: number of fields */
+/**********************************************************************//**
+Creates an index memory object.
+@return	own: index object */
+UNIV_INTERN
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+	const char*	table_name,	/*!< in: table name */
+	const char*	index_name,	/*!< in: index name */
+	ulint		space,		/*!< in: space where the index tree is
+					placed, ignored if the index is of
+					the clustered type */
+	ulint		type,		/*!< in: DICT_UNIQUE,
+					DICT_CLUSTERED, ... ORed */
+	ulint		n_fields);	/*!< in: number of fields */
+/**********************************************************************//**
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+UNIV_INTERN
+void
+dict_mem_index_add_field(
+/*=====================*/
+	dict_index_t*	index,		/*!< in: index */
+	const char*	name,		/*!< in: column name */
+	ulint		prefix_len);	/*!< in: 0 or the column prefix length
+					in a MySQL index like
+					INDEX (textcol(25)) */
+/**********************************************************************//**
+Frees an index memory object. */
+UNIV_INTERN
+void
+dict_mem_index_free(
+/*================*/
+	dict_index_t*	index);	/*!< in: index */
+/**********************************************************************//**
+Creates and initializes a foreign constraint memory object.
+@return	own: foreign constraint struct */
+UNIV_INTERN
+dict_foreign_t*
+dict_mem_foreign_create(void);
+/*=========================*/
+
+/**********************************************************************//**
+Sets the foreign_table_name_lookup pointer based on the value of
+lower_case_table_names.  If that is 0 or 1, foreign_table_name_lookup
+will point to foreign_table_name.  If 2, then another string is
+allocated from the heap and set to lower case. */
+UNIV_INTERN
+void
+dict_mem_foreign_table_name_lookup_set(
+/*===================================*/
+	dict_foreign_t*	foreign,	/*!< in/out: foreign struct */
+	ibool		do_alloc);	/*!< in: is an alloc needed */
+
+/**********************************************************************//**
+Sets the referenced_table_name_lookup pointer based on the value of
+lower_case_table_names.  If that is 0 or 1, referenced_table_name_lookup
+will point to referenced_table_name.  If 2, then another string is
+allocated from the heap and set to lower case. */
+UNIV_INTERN
+void
+dict_mem_referenced_table_name_lookup_set(
+/*======================================*/
+	dict_foreign_t*	foreign,	/*!< in/out: foreign struct */
+	ibool		do_alloc);	/*!< in: is an alloc needed */
+
+/** Create a temporary tablename like "#sql-ibtid-inc where
+  tid = the Table ID
+  inc = a randomly initialized number that is incremented for each file
+The table ID is a 64 bit integer, can use up to 20 digits, and is
+initialized at bootstrap. The second number is 32 bits, can use up to 10
+digits, and is initialized at startup to a randomly distributed number.
+It is hoped that the combination of these two numbers will provide a
+reasonably unique temporary file name.
+@param[in]	heap	A memory heap
+@param[in]	dbtab	Table name in the form database/table name
+@param[in]	id	Table id
+@return A unique temporary tablename suitable for InnoDB use */
+UNIV_INTERN
+char*
+dict_mem_create_temporary_tablename(
+	mem_heap_t*	heap,
+	const char*	dbtab,
+	table_id_t	id);
+
+/** Initialize dict memory variables */
+
+void
+dict_mem_init(void);
+
+/** Data structure for a column in a table */
+struct dict_col_t{
+	/*----------------------*/
+	/** The following are copied from dtype_t,
+	so that all bit-fields can be packed tightly. */
+	/* @{ */
+	unsigned	prtype:32;	/*!< precise type; MySQL data
+					type, charset code, flags to
+					indicate nullability,
+					signedness, whether this is a
+					binary string, whether this is
+					a true VARCHAR where MySQL
+					uses 2 bytes to store the length */
+	unsigned	mtype:8;	/*!< main data type */
+
+	/* the remaining fields do not affect alphabetical ordering: */
+
+	unsigned	len:16;		/*!< length; for MySQL data this
+					is field->pack_length(),
+					except that for a >= 5.0.3
+					type true VARCHAR this is the
+					maximum byte length of the
+					string data (in addition to
+					the string, MySQL uses 1 or 2
+					bytes to store the string length) */
+
+	unsigned	mbminmaxlen:5;	/*!< minimum and maximum length of a
+					character, in bytes;
+					DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
+					mbminlen=DATA_MBMINLEN(mbminmaxlen);
+					mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
+	/*----------------------*/
+	/* End of definitions copied from dtype_t */
+	/* @} */
+
+	unsigned	ind:10;		/*!< table column position
+					(starting from 0) */
+	unsigned	ord_part:1;	/*!< nonzero if this column
+					appears in the ordering fields
+					of an index */
+	unsigned	max_prefix:12;	/*!< maximum index prefix length on
+					this column. Our current max limit is
+					3072 for Barracuda table */
+};
+
+/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
+is the maximum indexed column length (or indexed prefix length) in
+ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
+any fixed-length field that is longer than this will be encoded as
+a variable-length field.
+
+It is set to 3*256, so that one can create a column prefix index on
+256 characters of a TEXT or VARCHAR column also in the UTF-8
+charset. In that charset, a character may take at most 3 bytes.  This
+constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
+#define DICT_ANTELOPE_MAX_INDEX_COL_LEN	REC_ANTELOPE_MAX_INDEX_COL_LEN
+
+/** Find out maximum indexed column length by its table format.
+For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
+field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For
+Barracuda row formats COMPRESSED and DYNAMIC, the length could
+be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
+#define DICT_MAX_FIELD_LEN_BY_FORMAT(table)				\
+		((dict_table_get_format(table) < UNIV_FORMAT_B)		\
+			? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)		\
+			: REC_VERSION_56_MAX_INDEX_COL_LEN)
+
+#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)			\
+		((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B)	\
+			? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)		\
+			: REC_VERSION_56_MAX_INDEX_COL_LEN)
+
+/** Defines the maximum fixed length column size */
+#define DICT_MAX_FIXED_COL_LEN		DICT_ANTELOPE_MAX_INDEX_COL_LEN
+
+/** Data structure for a field in an index */
+struct dict_field_t{
+	dict_col_t*	col;		/*!< pointer to the table column */
+	const char*	name;		/*!< name of the column */
+	unsigned	prefix_len:12;	/*!< 0 or the length of the column
+					prefix in bytes in a MySQL index of
+					type, e.g., INDEX (textcol(25));
+					must be smaller than
+					DICT_MAX_FIELD_LEN_BY_FORMAT;
+					NOTE that in the UTF-8 charset, MySQL
+					sets this to (mbmaxlen * the prefix len)
+					in UTF-8 chars */
+	unsigned	fixed_len:10;	/*!< 0 or the fixed length of the
+					column if smaller than
+					DICT_ANTELOPE_MAX_INDEX_COL_LEN */
+};
+
+/**********************************************************************//**
+PADDING HEURISTIC BASED ON LINEAR INCREASE OF PADDING TO AVOID
+COMPRESSION FAILURES
+(Note: this is relevant only for compressed indexes)
+GOAL: Avoid compression failures by maintaining information about the
+compressibility of data. If data is not very compressible then leave
+some extra space 'padding' in the uncompressed page making it more
+likely that compression of less than fully packed uncompressed page will
+succeed.
+
+This padding heuristic works by increasing the pad linearly until the
+desired failure rate is reached. A "round" is a fixed number of
+compression operations.
+After each round, the compression failure rate for that round is
+computed. If the failure rate is too high, then padding is incremented
+by a fixed value, otherwise it's left intact.
+If the compression failure is lower than the desired rate for a fixed
+number of consecutive rounds, then the padding is decreased by a fixed
+value. This is done to prevent overshooting the padding value,
+and to accommodate the possible change in data compressibility. */
+
+/** Number of zip ops in one round. */
+#define ZIP_PAD_ROUND_LEN			(128)
+
+/** Number of successful rounds after which the padding is decreased */
+#define ZIP_PAD_SUCCESSFUL_ROUND_LIMIT		(5)
+
+/** Amount by which padding is increased. */
+#define ZIP_PAD_INCR				(128)
+
+/** Percentage of compression failures that are allowed in a single
+round */
+extern ulong	zip_failure_threshold_pct;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+extern ulong	zip_pad_max;
+
+/** Data structure to hold information about about how much space in
+an uncompressed page should be left as padding to avoid compression
+failures. This estimate is based on a self-adapting heuristic. */
+struct zip_pad_info_t {
+	os_fast_mutex_t	mutex;	/*!< mutex protecting the info */
+	ulint		pad;	/*!< number of bytes used as pad */
+	ulint		success;/*!< successful compression ops during
+				current round */
+	ulint		failure;/*!< failed compression ops during
+				current round */
+	ulint		n_rounds;/*!< number of currently successful
+				rounds */
+};
+
+/** Data structure for an index.  Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_index_create(). */
+struct dict_index_t{
+	index_id_t	id;	/*!< id of the index */
+	mem_heap_t*	heap;	/*!< memory heap */
+	const char*	name;	/*!< index name */
+	const char*	table_name;/*!< table name */
+	dict_table_t*	table;	/*!< back pointer to table */
+#ifndef UNIV_HOTBACKUP
+	unsigned	space:32;
+				/*!< space where the index tree is placed */
+	unsigned	page:32;/*!< index tree root page number */
+#endif /* !UNIV_HOTBACKUP */
+	unsigned	type:DICT_IT_BITS;
+				/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
+				DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
+#define MAX_KEY_LENGTH_BITS 12
+	unsigned	trx_id_offset:MAX_KEY_LENGTH_BITS;
+				/*!< position of the trx id column
+				in a clustered index record, if the fields
+				before it are known to be of a fixed size,
+				0 otherwise */
+#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+#endif
+	unsigned	n_user_defined_cols:10;
+				/*!< number of columns the user defined to
+				be in the index: in the internal
+				representation we add more columns */
+	unsigned	n_uniq:10;/*!< number of fields from the beginning
+				which are enough to determine an index
+				entry uniquely */
+	unsigned	n_def:10;/*!< number of fields defined so far */
+	unsigned	n_fields:10;/*!< number of fields in the index */
+	unsigned	n_nullable:10;/*!< number of nullable fields */
+	unsigned	cached:1;/*!< TRUE if the index object is in the
+				dictionary cache */
+	unsigned	to_be_dropped:1;
+				/*!< TRUE if the index is to be dropped;
+				protected by dict_operation_lock */
+	unsigned	online_status:2;
+				/*!< enum online_index_status.
+				Transitions from ONLINE_INDEX_COMPLETE (to
+				ONLINE_INDEX_CREATION) are protected
+				by dict_operation_lock and
+				dict_sys->mutex. Other changes are
+				protected by index->lock. */
+	dict_field_t*	fields;	/*!< array of field descriptions */
+#ifndef UNIV_HOTBACKUP
+	UT_LIST_NODE_T(dict_index_t)
+			indexes;/*!< list of indexes of the table */
+	btr_search_t*	search_info;
+				/*!< info used in optimistic searches */
+	row_log_t*	online_log;
+				/*!< the log of modifications
+				during online index creation;
+				valid when online_status is
+				ONLINE_INDEX_CREATION */
+	/*----------------------*/
+	/** Statistics for query optimization */
+	/* @{ */
+	ib_uint64_t*	stat_n_diff_key_vals;
+				/*!< approximate number of different
+				key values for this index, for each
+				n-column prefix where 1 <= n <=
+				dict_get_n_unique(index) (the array is
+				indexed from 0 to n_uniq-1); we
+				periodically calculate new
+				estimates */
+	ib_uint64_t*	stat_n_sample_sizes;
+				/*!< number of pages that were sampled
+				to calculate each of stat_n_diff_key_vals[],
+				e.g. stat_n_sample_sizes[3] pages were sampled
+				to get the number stat_n_diff_key_vals[3]. */
+	ib_uint64_t*	stat_n_non_null_key_vals;
+				/* approximate number of non-null key values
+				for this index, for each column where
+				1 <= n <= dict_get_n_unique(index) (the array
+				is indexed from 0 to n_uniq-1); This
+				is used when innodb_stats_method is
+				"nulls_ignored". */
+	ulint		stat_index_size;
+				/*!< approximate index size in
+				database pages */
+	ulint		stat_n_leaf_pages;
+				/*!< approximate number of leaf pages in the
+				index tree */
+	/* @} */
+	rw_lock_t	lock;	/*!< read-write lock protecting the
+				upper levels of the index tree */
+	trx_id_t	trx_id; /*!< id of the transaction that created this
+				index, or 0 if the index existed
+				when InnoDB was started up */
+	zip_pad_info_t	zip_pad;/*!< Information about state of
+				compression failures and successes */
+#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_BLOB_DEBUG
+	ib_mutex_t		blobs_mutex;
+				/*!< mutex protecting blobs */
+	ib_rbt_t*	blobs;	/*!< map of (page_no,heap_no,field_no)
+				to first_blob_page_no; protected by
+				blobs_mutex; @see btr_blob_dbg_t */
+#endif /* UNIV_BLOB_DEBUG */
+#ifdef UNIV_DEBUG
+	ulint		magic_n;/*!< magic number */
+/** Value of dict_index_t::magic_n */
+# define DICT_INDEX_MAGIC_N	76789786
+#endif
+};
+
+/** The status of online index creation */
+enum online_index_status {
+	/** the index is complete and ready for access */
+	ONLINE_INDEX_COMPLETE = 0,
+	/** the index is being created, online
+	(allowing concurrent modifications) */
+	ONLINE_INDEX_CREATION,
+	/** secondary index creation was aborted and the index
+	should be dropped as soon as index->table->n_ref_count reaches 0,
+	or online table rebuild was aborted and the clustered index
+	of the original table should soon be restored to
+	ONLINE_INDEX_COMPLETE */
+	ONLINE_INDEX_ABORTED,
+	/** the online index creation was aborted, the index was
+	dropped from the data dictionary and the tablespace, and it
+	should be dropped from the data dictionary cache as soon as
+	index->table->n_ref_count reaches 0. */
+	ONLINE_INDEX_ABORTED_DROPPED
+};
+
+/** Data structure for a foreign key constraint; an example:
+FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D).  Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
+struct dict_foreign_t{
+	mem_heap_t*	heap;		/*!< this object is allocated from
+					this memory heap */
+	char*		id;		/*!< id of the constraint as a
+					null-terminated string */
+	unsigned	n_fields:10;	/*!< number of indexes' first fields
+					for which the foreign key
+					constraint is defined: we allow the
+					indexes to contain more fields than
+					mentioned in the constraint, as long
+					as the first fields are as mentioned */
+	unsigned	type:6;		/*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
+					or DICT_FOREIGN_ON_DELETE_SET_NULL */
+	char*		foreign_table_name;/*!< foreign table name */
+	char*		foreign_table_name_lookup;
+				/*!< foreign table name used for dict lookup */
+	dict_table_t*	foreign_table;	/*!< table where the foreign key is */
+	const char**	foreign_col_names;/*!< names of the columns in the
+					foreign key */
+	char*		referenced_table_name;/*!< referenced table name */
+	char*		referenced_table_name_lookup;
+				/*!< referenced table name for dict lookup*/
+	dict_table_t*	referenced_table;/*!< table where the referenced key
+					is */
+	const char**	referenced_col_names;/*!< names of the referenced
+					columns in the referenced table */
+	dict_index_t*	foreign_index;	/*!< foreign index; we require that
+					both tables contain explicitly defined
+					indexes for the constraint: InnoDB
+					does not generate new indexes
+					implicitly */
+	dict_index_t*	referenced_index;/*!< referenced index */
+};
+
+std::ostream&
+operator<< (std::ostream& out, const dict_foreign_t& foreign);
+
+struct dict_foreign_print {
+
+	dict_foreign_print(std::ostream& out)
+		: m_out(out)
+	{}
+
+	void operator()(const dict_foreign_t* foreign) {
+		m_out << *foreign;
+	}
+private:
+	std::ostream&	m_out;
+};
+
+/** Compare two dict_foreign_t objects using their ids. Used in the ordering
+of dict_table_t::foreign_set and dict_table_t::referenced_set.  It returns
+true if the first argument is considered to go before the second in the
+strict weak ordering it defines, and false otherwise. */
+struct dict_foreign_compare {
+
+	bool operator()(
+		const dict_foreign_t*	lhs,
+		const dict_foreign_t*	rhs) const
+	{
+		return(ut_strcmp(lhs->id, rhs->id) < 0);
+	}
+};
+
+/** A function object to find a foreign key with the given index as the
+referenced index. Return the foreign key with matching criteria or NULL */
+struct dict_foreign_with_index {
+
+	dict_foreign_with_index(const dict_index_t*	index)
+	: m_index(index)
+	{}
+
+	bool operator()(const dict_foreign_t*	foreign) const
+	{
+		return(foreign->referenced_index == m_index);
+	}
+
+	const dict_index_t*	m_index;
+};
+
+/* A function object to check if the foreign constraint is between different
+tables.  Returns true if foreign key constraint is between different tables,
+false otherwise. */
+struct dict_foreign_different_tables {
+
+	bool operator()(const dict_foreign_t*	foreign) const
+	{
+		return(foreign->foreign_table != foreign->referenced_table);
+	}
+};
+
+/** A function object to check if the foreign key constraint has the same
+name as given.  If the full name of the foreign key constraint doesn't match,
+then, check if removing the database name from the foreign key constraint
+matches. Return true if it matches, false otherwise. */
+struct dict_foreign_matches_id {
+
+	dict_foreign_matches_id(const char* id)
+		: m_id(id)
+	{}
+
+	bool operator()(const dict_foreign_t*	foreign) const
+	{
+		if (0 == innobase_strcasecmp(foreign->id, m_id)) {
+			return(true);
+		}
+		if (const char* pos = strchr(foreign->id, '/')) {
+			if (0 == innobase_strcasecmp(m_id, pos + 1)) {
+				return(true);
+			}
+		}
+		return(false);
+	}
+
+	const char*	m_id;
+};
+
+typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set;
+
+std::ostream&
+operator<< (std::ostream& out, const dict_foreign_set& fk_set);
+
+/** Function object to check if a foreign key object is there
+in the given foreign key set or not.  It returns true if the
+foreign key is not found, false otherwise */
+struct dict_foreign_not_exists {
+	dict_foreign_not_exists(const dict_foreign_set& obj_)
+		: m_foreigns(obj_)
+	{}
+
+	/* Return true if the given foreign key is not found */
+	bool operator()(dict_foreign_t* const & foreign) const {
+		return(m_foreigns.find(foreign) == m_foreigns.end());
+	}
+private:
+	const dict_foreign_set&	m_foreigns;
+};
+
+/** Validate the search order in the foreign key set.
+@param[in]	fk_set	the foreign key set to be validated
+@return true if search order is fine in the set, false otherwise. */
+bool
+dict_foreign_set_validate(
+	const dict_foreign_set&	fk_set);
+
+/** Validate the search order in the foreign key sets of the table
+(foreign_set and referenced_set).
+@param[in]	table	table whose foreign key sets are to be validated
+@return true if foreign key sets are fine, false otherwise. */
+bool
+dict_foreign_set_validate(
+	const dict_table_t&	table);
+
+/*********************************************************************//**
+Frees a foreign key struct. */
+inline
+void
+dict_foreign_free(
+/*==============*/
+	dict_foreign_t*	foreign)	/*!< in, own: foreign key struct */
+{
+	mem_heap_free(foreign->heap);
+}
+
+/** The destructor will free all the foreign key constraints in the set
+by calling dict_foreign_free() on each of the foreign key constraints.
+This is used to free the allocated memory when a local set goes out
+of scope. */
+struct dict_foreign_set_free {
+
+	dict_foreign_set_free(const dict_foreign_set&	foreign_set)
+		: m_foreign_set(foreign_set)
+	{}
+
+	~dict_foreign_set_free()
+	{
+		std::for_each(m_foreign_set.begin(),
+			      m_foreign_set.end(),
+			      dict_foreign_free);
+	}
+
+	const dict_foreign_set&	m_foreign_set;
+};
+
+/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
+a foreign key constraint is enforced, therefore RESTRICT just means no flag */
+/* @{ */
+#define DICT_FOREIGN_ON_DELETE_CASCADE	1	/*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_DELETE_SET_NULL	2	/*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_UPDATE_CASCADE	4	/*!< ON DELETE CASCADE */
+#define DICT_FOREIGN_ON_UPDATE_SET_NULL	8	/*!< ON UPDATE SET NULL */
+#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16	/*!< ON DELETE NO ACTION */
+#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32	/*!< ON UPDATE NO ACTION */
+/* @} */
+
+/* This flag is for sync SQL DDL and memcached DML.
+if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on
+the table, DML from memcached will be blocked. */
+#define DICT_TABLE_IN_DDL -1
+
+/** Data structure for a database table.  Most fields will be
+initialized to 0, NULL or FALSE in dict_mem_table_create(). */
+struct dict_table_t{
+
+
+	table_id_t	id;	/*!< id of the table */
+	mem_heap_t*	heap;	/*!< memory heap */
+	char*		name;	/*!< table name */
+	const char*	dir_path_of_temp_table;/*!< NULL or the directory path
+				where a TEMPORARY table that was explicitly
+				created by a user should be placed if
+				innodb_file_per_table is defined in my.cnf;
+				in Unix this is usually /tmp/..., in Windows
+				temp\... */
+	char*		data_dir_path; /*!< NULL or the directory path
+				specified by DATA DIRECTORY */
+	unsigned	space:32;
+				/*!< space where the clustered index of the
+				table is placed */
+	unsigned	flags:DICT_TF_BITS;	/*!< DICT_TF_... */
+	unsigned	flags2:DICT_TF2_BITS;	/*!< DICT_TF2_... */
+	unsigned	ibd_file_missing:1;
+				/*!< TRUE if this is in a single-table
+				tablespace and the .ibd file is missing; then
+				we must return in ha_innodb.cc an error if the
+				user tries to query such an orphaned table */
+	unsigned	cached:1;/*!< TRUE if the table object has been added
+				to the dictionary cache */
+	unsigned	to_be_dropped:1;
+				/*!< TRUE if the table is to be dropped, but
+				not yet actually dropped (could in the bk
+				drop list); It is turned on at the beginning
+				of row_drop_table_for_mysql() and turned off
+				just before we start to update system tables
+				for the drop. It is protected by
+				dict_operation_lock */
+	unsigned	n_def:10;/*!< number of columns defined so far */
+	unsigned	n_cols:10;/*!< number of columns */
+	unsigned	can_be_evicted:1;
+				/*!< TRUE if it's not an InnoDB system table
+				or a table that has no FK relationships */
+	unsigned	corrupted:1;
+				/*!< TRUE if table is corrupted */
+	unsigned	drop_aborted:1;
+				/*!< TRUE if some indexes should be dropped
+				after ONLINE_INDEX_ABORTED
+				or ONLINE_INDEX_ABORTED_DROPPED */
+	dict_col_t*	cols;	/*!< array of column descriptions */
+	const char*	col_names;
+				/*!< Column names packed in a character string
+				"name1\0name2\0...nameN\0".  Until
+				the string contains n_cols, it will be
+				allocated from a temporary heap.  The final
+				string will be allocated from table->heap. */
+#ifndef UNIV_HOTBACKUP
+	hash_node_t	name_hash; /*!< hash chain node */
+	hash_node_t	id_hash; /*!< hash chain node */
+	UT_LIST_BASE_NODE_T(dict_index_t)
+			indexes; /*!< list of indexes of the table */
+
+	dict_foreign_set	foreign_set;
+				/*!< set of foreign key constraints
+				in the table; these refer to columns
+				in other tables */
+
+	dict_foreign_set	referenced_set;
+				/*!< list of foreign key constraints
+				which refer to this table */
+
+	UT_LIST_NODE_T(dict_table_t)
+			table_LRU; /*!< node of the LRU list of tables */
+	unsigned	fk_max_recusive_level:8;
+				/*!< maximum recursive level we support when
+				loading tables chained together with FK
+				constraints. If exceeds this level, we will
+				stop loading child table into memory along with
+				its parent table */
+	ulint		n_foreign_key_checks_running;
+				/*!< count of how many foreign key check
+				operations are currently being performed
+				on the table: we cannot drop the table while
+				there are foreign key checks running on
+				it! */
+	trx_id_t	def_trx_id;
+				/*!< transaction id that last touched
+				the table definition, either when
+				loading the definition or CREATE
+				TABLE, or ALTER TABLE (prepare,
+				commit, and rollback phases) */
+	trx_id_t	query_cache_inv_trx_id;
+				/*!< transactions whose trx id is
+				smaller than this number are not
+				allowed to store to the MySQL query
+				cache or retrieve from it; when a trx
+				with undo logs commits, it sets this
+				to the value of the trx id counter for
+				the tables it had an IX lock on */
+#ifdef UNIV_DEBUG
+	/*----------------------*/
+	ibool		does_not_fit_in_memory;
+				/*!< this field is used to specify in
+				simulations tables which are so big
+				that disk should be accessed: disk
+				access is simulated by putting the
+				thread to sleep for a while; NOTE that
+				this flag is not stored to the data
+				dictionary on disk, and the database
+				will forget about value TRUE if it has
+				to reload the table definition from
+				disk */
+#endif /* UNIV_DEBUG */
+	/*----------------------*/
+	unsigned	big_rows:1;
+				/*!< flag: TRUE if the maximum length of
+				a single row exceeds BIG_ROW_SIZE;
+				initialized in dict_table_add_to_cache() */
+				/** Statistics for query optimization */
+				/* @{ */
+
+	volatile os_once::state_t	stats_latch_created;
+				/*!< Creation state of 'stats_latch'. */
+
+	rw_lock_t*	stats_latch; /*!< this latch protects:
+				dict_table_t::stat_initialized
+				dict_table_t::stat_n_rows (*)
+				dict_table_t::stat_clustered_index_size
+				dict_table_t::stat_sum_of_other_index_sizes
+				dict_table_t::stat_modified_counter (*)
+				dict_table_t::indexes*::stat_n_diff_key_vals[]
+				dict_table_t::indexes*::stat_index_size
+				dict_table_t::indexes*::stat_n_leaf_pages
+				(*) those are not always protected for
+				performance reasons */
+	unsigned	stat_initialized:1; /*!< TRUE if statistics have
+				been calculated the first time
+				after database startup or table creation */
+#define DICT_TABLE_IN_USED      -1
+	lint		memcached_sync_count;
+				/*!< count of how many handles are opened
+				to this table from memcached; DDL on the
+				table is NOT allowed until this count
+				goes to zero. If it's -1, means there's DDL
+		                on the table, DML from memcached will be
+				blocked. */
+	ib_time_t	stats_last_recalc;
+				/*!< Timestamp of last recalc of the stats */
+	ib_uint32_t	stat_persistent;
+				/*!< The two bits below are set in the
+				::stat_persistent member and have the following
+				meaning:
+				1. _ON=0, _OFF=0, no explicit persistent stats
+				setting for this table, the value of the global
+				srv_stats_persistent is used to determine
+				whether the table has persistent stats enabled
+				or not
+				2. _ON=0, _OFF=1, persistent stats are
+				explicitly disabled for this table, regardless
+				of the value of the global srv_stats_persistent
+				3. _ON=1, _OFF=0, persistent stats are
+				explicitly enabled for this table, regardless
+				of the value of the global srv_stats_persistent
+				4. _ON=1, _OFF=1, not allowed, we assert if
+				this ever happens. */
+#define DICT_STATS_PERSISTENT_ON	(1 << 1)
+#define DICT_STATS_PERSISTENT_OFF	(1 << 2)
+	ib_uint32_t	stats_auto_recalc;
+				/*!< The two bits below are set in the
+				::stats_auto_recalc member and have
+				the following meaning:
+				1. _ON=0, _OFF=0, no explicit auto recalc
+				setting for this table, the value of the global
+				srv_stats_persistent_auto_recalc is used to
+				determine whether the table has auto recalc
+				enabled or not
+				2. _ON=0, _OFF=1, auto recalc is explicitly
+				disabled for this table, regardless of the
+				value of the global
+				srv_stats_persistent_auto_recalc
+				3. _ON=1, _OFF=0, auto recalc is explicitly
+				enabled for this table, regardless of the
+				value of the global
+				srv_stats_persistent_auto_recalc
+				4. _ON=1, _OFF=1, not allowed, we assert if
+				this ever happens. */
+#define DICT_STATS_AUTO_RECALC_ON	(1 << 1)
+#define DICT_STATS_AUTO_RECALC_OFF	(1 << 2)
+	ulint		stats_sample_pages;
+				/*!< the number of pages to sample for this
+				table during persistent stats estimation;
+				if this is 0, then the value of the global
+				srv_stats_persistent_sample_pages will be
+				used instead. */
+	ib_uint64_t	stat_n_rows;
+				/*!< approximate number of rows in the table;
+				we periodically calculate new estimates */
+	ulint		stat_clustered_index_size;
+				/*!< approximate clustered index size in
+				database pages */
+	ulint		stat_sum_of_other_index_sizes;
+				/*!< other indexes in database pages */
+	ib_uint64_t	stat_modified_counter;
+				/*!< when a row is inserted, updated,
+				or deleted,
+				we add 1 to this number; we calculate new
+				estimates for the stat_... values for the
+				table and the indexes when about 1 / 16 of
+				table has been modified;
+				also when the estimate operation is
+				called for MySQL SHOW TABLE STATUS; the
+				counter is reset to zero at statistics
+				calculation; this counter is not protected by
+				any latch, because this is only used for
+				heuristics */
+#define BG_STAT_NONE		0
+#define BG_STAT_IN_PROGRESS	(1 << 0)
+				/*!< BG_STAT_IN_PROGRESS is set in
+				stats_bg_flag when the background
+				stats code is working on this table. The DROP
+				TABLE code waits for this to be cleared
+				before proceeding. */
+#define BG_STAT_SHOULD_QUIT	(1 << 1)
+				/*!< BG_STAT_SHOULD_QUIT is set in
+				stats_bg_flag when DROP TABLE starts
+				waiting on BG_STAT_IN_PROGRESS to be cleared,
+				the background stats thread will detect this
+				and will eventually quit sooner */
+	byte		stats_bg_flag;
+				/*!< see BG_STAT_* above.
+				Writes are covered by dict_sys->mutex.
+				Dirty reads are possible. */
+				/* @} */
+	/*----------------------*/
+				/**!< The following fields are used by the
+				AUTOINC code.  The actual collection of
+				tables locked during AUTOINC read/write is
+				kept in trx_t. In order to quickly determine
+				whether a transaction has locked the AUTOINC
+				lock we keep a pointer to the transaction
+				here in the autoinc_trx variable. This is to
+				avoid acquiring the lock_sys_t::mutex and
+				scanning the vector in trx_t.
+
+				When an AUTOINC lock has to wait, the
+				corresponding lock instance is created on
+				the trx lock heap rather than use the
+				pre-allocated instance in autoinc_lock below.*/
+				/* @{ */
+	lock_t*		autoinc_lock;
+				/*!< a buffer for an AUTOINC lock
+				for this table: we allocate the memory here
+				so that individual transactions can get it
+				and release it without a need to allocate
+				space from the lock heap of the trx:
+				otherwise the lock heap would grow rapidly
+				if we do a large insert from a select */
+	ib_mutex_t		autoinc_mutex;
+				/*!< mutex protecting the autoincrement
+				counter */
+	ib_uint64_t	autoinc;/*!< autoinc counter value to give to the
+				next inserted row */
+	ulong		n_waiting_or_granted_auto_inc_locks;
+				/*!< This counter is used to track the number
+				of granted and pending autoinc locks on this
+				table. This value is set after acquiring the
+				lock_sys_t::mutex but we peek the contents to
+				determine whether other transactions have
+				acquired the AUTOINC lock or not. Of course
+				only one transaction can be granted the
+				lock but there can be multiple waiters. */
+	const trx_t*	autoinc_trx;
+				/*!< The transaction that currently holds the
+				the AUTOINC lock on this table.
+				Protected by lock_sys->mutex. */
+	fts_t*		fts;	/* FTS specific state variables */
+				/* @} */
+	/*----------------------*/
+
+	ib_quiesce_t	 quiesce;/*!< Quiescing states, protected by the
+				dict_index_t::lock. ie. we can only change
+				the state if we acquire all the latches
+				(dict_index_t::lock) in X mode of this table's
+				indexes. */
+
+	/*----------------------*/
+	ulint		n_rec_locks;
+				/*!< Count of the number of record locks on
+				this table. We use this to determine whether
+				we can evict the table from the dictionary
+				cache. It is protected by lock_sys->mutex. */
+	ulint		n_ref_count;
+				/*!< count of how many handles are opened
+				to this table; dropping of the table is
+				NOT allowed until this count gets to zero;
+				MySQL does NOT itself check the number of
+				open handles at drop */
+	UT_LIST_BASE_NODE_T(lock_t)
+			locks;	/*!< list of locks on the table; protected
+				by lock_sys->mutex */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+	ulint		magic_n;/*!< magic number */
+/** Value of dict_table_t::magic_n */
+# define DICT_TABLE_MAGIC_N	76333786
+#endif /* UNIV_DEBUG */
+};
+
+/** A function object to add the foreign key constraint to the referenced set
+of the referenced table, if it exists in the dictionary cache. */
+struct dict_foreign_add_to_referenced_table {
+	void operator()(dict_foreign_t*	foreign) const
+	{
+		if (dict_table_t* table = foreign->referenced_table) {
+			std::pair<dict_foreign_set::iterator, bool>	ret
+				= table->referenced_set.insert(foreign);
+			ut_a(ret.second);
+		}
+	}
+};
+
+#ifndef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
new file mode 100644
index 00000000000..38d51f61789
--- /dev/null
+++ b/storage/innobase/include/dict0mem.ic
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0mem.ic
+Data dictionary memory object creation
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "data0type.h"
+#include "dict0mem.h"
+#include "fil0fil.h"
+
+/**********************************************************************//**
+This function poplulates a dict_index_t index memory structure with
+supplied information. */
+UNIV_INLINE
+void
+dict_mem_fill_index_struct(
+/*=======================*/
+	dict_index_t*	index,		/*!< out: index to be filled */
+	mem_heap_t*	heap,		/*!< in: memory heap */
+	const char*	table_name,	/*!< in: table name */
+	const char*	index_name,	/*!< in: index name */
+	ulint		space,		/*!< in: space where the index tree is
+					placed, ignored if the index is of
+					the clustered type */
+	ulint		type,		/*!< in: DICT_UNIQUE,
+					DICT_CLUSTERED, ... ORed */
+	ulint		n_fields)	/*!< in: number of fields */
+{
+
+	if (heap) {
+		index->heap = heap;
+		index->name = mem_heap_strdup(heap, index_name);
+		index->fields = (dict_field_t*) mem_heap_alloc(
+			heap, 1 + n_fields * sizeof(dict_field_t));
+	} else {
+		index->name = index_name;
+		index->heap = NULL;
+		index->fields = NULL;
+	}
+
+	/* Assign a ulint to a 4-bit-mapped field.
+	Only the low-order 4 bits are assigned. */
+	index->type = type;
+#ifndef UNIV_HOTBACKUP
+	index->space = (unsigned int) space;
+	index->page = FIL_NULL;
+#endif /* !UNIV_HOTBACKUP */
+	index->table_name = table_name;
+	index->n_fields = (unsigned int) n_fields;
+	/* The '1 +' above prevents allocation
+	of an empty mem block */
+#ifdef UNIV_DEBUG
+	index->magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+}
diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h
new file mode 100644
index 00000000000..9a3c8e22992
--- /dev/null
+++ b/storage/innobase/include/dict0priv.h
@@ -0,0 +1,63 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0priv.h
+Data dictionary private functions
+
+Created  Fri 2 Jul 2010 13:30:38 EST - Sunny Bains
+*******************************************************/
+
+#ifndef dict0priv_h
+#define dict0priv_h
+
+/**********************************************************************//**
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. Note: Not to be called from outside dict0*c functions.
+@return	table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+	const char*	table_name);		/*!< in: table name */
+
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return	table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*=============================*/
+	const char*	table_name);		/*!< in: table name */
+
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
+UNIV_INLINE
+dict_table_t*
+dict_table_open_on_id_low(
+/*=====================*/
+	table_id_t		table_id,	/*!< in: table id */
+	dict_err_ignore_t	ignore_err);	/*!< in: errors to ignore
+						when loading the table */
+
+#ifndef UNIV_NONINL
+#include "dict0priv.ic"
+#endif
+
+#endif /* dict0priv.h */
diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic
new file mode 100644
index 00000000000..30ba8fb60aa
--- /dev/null
+++ b/storage/innobase/include/dict0priv.ic
@@ -0,0 +1,125 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0priv.ic
+Data dictionary system private include file
+
+Created  Wed 13 Oct 2010 16:10:14 EST Sunny Bains
+***********************************************************************/
+
+#include "dict0dict.h"
+#include "dict0load.h"
+#include "dict0priv.h"
+#ifndef UNIV_HOTBACKUP
+
+/**********************************************************************//**
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function.
+@return	table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+	const char*	table_name)	/*!< in: table name */
+{
+	dict_table_t*	table;
+
+	ut_ad(table_name);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	table = dict_table_check_if_in_cache_low(table_name);
+
+	if (table && table->corrupted) {
+		fprintf(stderr, "InnoDB: table");
+		ut_print_name(stderr, NULL, TRUE, table->name);
+		if (srv_load_corrupted) {
+			fputs(" is corrupted, but"
+			      " innodb_force_load_corrupted is set\n", stderr);
+		} else {
+			fputs(" is corrupted\n", stderr);
+			return(NULL);
+		}
+	}
+
+	if (table == NULL) {
+		table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
+	}
+
+	ut_ad(!table || table->cached);
+
+	return(table);
+}
+
+/**********************************************************************//**
+Returns a table object based on table id.
+@return	table, NULL if does not exist */
+UNIV_INLINE
+dict_table_t*
+dict_table_open_on_id_low(
+/*======================*/
+	table_id_t		table_id,	/*!< in: table id */
+	dict_err_ignore_t	ignore_err)	/*!< in: errors to ignore
+						when loading the table */
+{
+	dict_table_t*	table;
+	ulint		fold;
+
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	/* Look for the table name in the hash table */
+	fold = ut_fold_ull(table_id);
+
+	HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
+		    dict_table_t*, table, ut_ad(table->cached),
+		    table->id == table_id);
+	if (table == NULL) {
+		table = dict_load_table_on_id(table_id, ignore_err);
+	}
+
+	ut_ad(!table || table->cached);
+
+	/* TODO: should get the type information from MySQL */
+
+	return(table);
+}
+
+/**********************************************************************//**
+Checks if a table is in the dictionary cache.
+@return	table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*=============================*/
+	const char*	table_name)	/*!< in: table name */
+{
+	dict_table_t*	table;
+	ulint		table_fold;
+
+	ut_ad(table_name);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+
+	/* Look for the table name in the hash table */
+	table_fold = ut_fold_string(table_name);
+
+	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
+		    dict_table_t*, table, ut_ad(table->cached),
+		    !strcmp(table->name, table_name));
+	return(table);
+}
+#endif /*! UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h
new file mode 100644
index 00000000000..186f90e3694
--- /dev/null
+++ b/storage/innobase/include/dict0stats.h
@@ -0,0 +1,202 @@
+/*****************************************************************************
+
+Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats.h
+Code used for calculating and manipulating table statistics.
+
+Created Jan 06, 2010 Vasil Dimov
+*******************************************************/
+
+#ifndef dict0stats_h
+#define dict0stats_h
+
+#include "univ.i"
+
+#include "db0err.h"
+#include "dict0types.h"
+#include "trx0types.h"
+
+enum dict_stats_upd_option_t {
+	DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the
+				statistics using a precise and slow
+				algo and save them to the persistent
+				storage, if the persistent storage is
+				not present then emit a warning and
+				fall back to transient stats */
+	DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics
+				using an imprecise quick algo
+				without saving the results
+				persistently */
+	DICT_STATS_EMPTY_TABLE,	/* Write all zeros (or 1 where it makes sense)
+				into a table and its indexes' statistics
+				members. The resulting stats correspond to an
+				empty table. If the table is using persistent
+				statistics, then they are saved on disk. */
+	DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats
+				from the persistent storage if the in-memory
+				structures have not been initialized yet,
+				otherwise do nothing */
+};
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced. */
+UNIV_INTERN
+void
+dict_stats_update_transient(
+/*========================*/
+	dict_table_t*	table);	/*!< in/out: table */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ibool		ps_on,	/*!< in: persistent stats explicitly enabled */
+	ibool		ps_off)	/*!< in: persistent stats explicitly disabled */
+	__attribute__((nonnull));
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+	dict_table_t*	table,			/*!< in/out: table */
+	ibool		auto_recalc_on,		/*!< in: explicitly enabled */
+	ibool		auto_recalc_off);	/*!< in: explicitly disabled */
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+	const dict_table_t*	table);	/*!< in: table */
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table. */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+	dict_table_t*	table);	/*!< in/out: table */
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((nonnull));
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization.
+@return DB_* error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_stats_update(
+/*==============*/
+	dict_table_t*		table,	/*!< in/out: table */
+	dict_stats_upd_option_t	stats_upd_option);
+					/*!< in: whether to (re) calc
+					the stats or to fetch them from
+					the persistent storage */
+
+/*********************************************************************//**
+Removes the information for a particular index's stats from the persistent
+storage if it exists and if there is data stored for this index.
+This function creates its own trx and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+	const char*	tname,	/*!< in: table name */
+	const char*	iname,	/*!< in: index name */
+	char*		errstr, /*!< out: error message if != DB_SUCCESS
+				is returned */
+	ulint		errstr_sz);/*!< in: size of the errstr buffer */
+
+/*********************************************************************//**
+Removes the statistics for a table and all of its indexes from the
+persistent storage if it exists and if there is data stored for the table.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_drop_table(
+/*==================*/
+	const char*	table_name,	/*!< in: table name */
+	char*		errstr,		/*!< out: error message
+					if != DB_SUCCESS is returned */
+	ulint		errstr_sz);	/*!< in: size of errstr buffer */
+
+/*********************************************************************//**
+Fetches or calculates new estimates for index statistics. */
+UNIV_INTERN
+void
+dict_stats_update_for_index(
+/*========================*/
+	dict_index_t*	index)	/*!< in/out: index */
+	__attribute__((nonnull));
+
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+	const char*	old_name,	/*!< in: old table name */
+	const char*	new_name,	/*!< in: new table name */
+	char*		errstr,		/*!< out: error string if != DB_SUCCESS
+					is returned */
+	size_t		errstr_sz);	/*!< in: errstr size */
+
+#ifndef UNIV_NONINL
+#include "dict0stats.ic"
+#endif
+
+#endif /* dict0stats_h */
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
new file mode 100644
index 00000000000..ec9a9065470
--- /dev/null
+++ b/storage/innobase/include/dict0stats.ic
@@ -0,0 +1,236 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats.ic
+Code used for calculating and manipulating table statistics.
+
+Created Jan 23, 2012 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "dict0dict.h" /* dict_table_stats_lock() */
+#include "dict0types.h" /* dict_table_t */
+#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	ibool		ps_on,	/*!< in: persistent stats explicitly enabled */
+	ibool		ps_off)	/*!< in: persistent stats explicitly disabled */
+{
+	/* Not allowed to have both flags set, but a CREATE or ALTER
+	statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would
+	end up having both set. In this case we clear the OFF flag. */
+	if (ps_on && ps_off) {
+		ps_off = FALSE;
+	}
+
+	ib_uint32_t	stat_persistent = 0;
+
+	if (ps_on) {
+		stat_persistent |= DICT_STATS_PERSISTENT_ON;
+	}
+
+	if (ps_off) {
+		stat_persistent |= DICT_STATS_PERSISTENT_OFF;
+	}
+
+	/* we rely on this assignment to be atomic */
+	table->stat_persistent = stat_persistent;
+}
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	/* Because of the nature of this check (non-locking) it is possible
+	that a table becomes:
+	* PS-disabled immediately after this function has returned TRUE or
+	* PS-enabled immediately after this function has returned FALSE.
+	This means that it is possible that we do:
+	+ dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has
+	  just been PS-disabled or
+	+ dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has
+	  just been PS-enabled.
+	This is acceptable. Avoiding this would mean that we would have to
+	protect the ::stat_persistent with dict_table_stats_lock() like the
+	other ::stat_ members which would be too big performance penalty,
+	especially when this function is called from
+	row_update_statistics_if_needed(). */
+
+	/* we rely on this read to be atomic */
+	ib_uint32_t	stat_persistent = table->stat_persistent;
+
+	if (stat_persistent & DICT_STATS_PERSISTENT_ON) {
+		ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF));
+		return(TRUE);
+	} else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) {
+		return(FALSE);
+	} else {
+		return(srv_stats_persistent);
+	}
+}
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+	dict_table_t*	table,			/*!< in/out: table */
+	ibool		auto_recalc_on,		/*!< in: explicitly enabled */
+	ibool		auto_recalc_off)	/*!< in: explicitly disabled */
+{
+	ut_ad(!auto_recalc_on || !auto_recalc_off);
+
+	ib_uint32_t	stats_auto_recalc = 0;
+
+	if (auto_recalc_on) {
+		stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON;
+	}
+
+	if (auto_recalc_off) {
+		stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF;
+	}
+
+	/* we rely on this assignment to be atomic */
+	table->stats_auto_recalc = stats_auto_recalc;
+}
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	/* we rely on this read to be atomic */
+	ib_uint32_t	stats_auto_recalc = table->stats_auto_recalc;
+
+	if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) {
+		ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF));
+		return(TRUE);
+	} else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) {
+		return(FALSE);
+	} else {
+		return(srv_stats_auto_recalc);
+	}
+}
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table. */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	ut_ad(!mutex_own(&dict_sys->mutex));
+
+	if (table->stat_initialized) {
+		return;
+	}
+
+	dict_stats_upd_option_t	opt;
+
+	if (dict_stats_is_persistent_enabled(table)) {
+		opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+	} else {
+		opt = DICT_STATS_RECALC_TRANSIENT;
+	}
+
+	dict_stats_update(table, opt);
+}
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	ut_a(table->n_ref_count == 0);
+
+	dict_table_stats_lock(table, RW_X_LATCH);
+
+	if (!table->stat_initialized) {
+		dict_table_stats_unlock(table, RW_X_LATCH);
+		return;
+	}
+
+	table->stat_initialized = FALSE;
+
+#ifdef UNIV_DEBUG_VALGRIND
+	UNIV_MEM_INVALID(&table->stat_n_rows,
+			 sizeof(table->stat_n_rows));
+	UNIV_MEM_INVALID(&table->stat_clustered_index_size,
+			 sizeof(table->stat_clustered_index_size));
+	UNIV_MEM_INVALID(&table->stat_sum_of_other_index_sizes,
+			 sizeof(table->stat_sum_of_other_index_sizes));
+	UNIV_MEM_INVALID(&table->stat_modified_counter,
+			 sizeof(table->stat_modified_counter));
+
+	dict_index_t*   index;
+
+	for (index = dict_table_get_first_index(table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+
+		ulint	n_uniq = dict_index_get_n_unique(index);
+
+		UNIV_MEM_INVALID(
+			index->stat_n_diff_key_vals,
+			n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+		UNIV_MEM_INVALID(
+			index->stat_n_sample_sizes,
+			n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+		UNIV_MEM_INVALID(
+			index->stat_n_non_null_key_vals,
+			n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+		UNIV_MEM_INVALID(
+			&index->stat_index_size,
+			sizeof(index->stat_index_size));
+		UNIV_MEM_INVALID(
+			&index->stat_n_leaf_pages,
+			sizeof(index->stat_n_leaf_pages));
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
+
+	dict_table_stats_unlock(table, RW_X_LATCH);
+}
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
new file mode 100644
index 00000000000..e866ab419fe
--- /dev/null
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -0,0 +1,127 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.h
+Code used for background table and index stats gathering.
+
+Created Apr 26, 2012 Vasil Dimov
+*******************************************************/
+
+#ifndef dict0stats_bg_h
+#define dict0stats_bg_h
+
+#include "univ.i"
+
+#include "dict0types.h" /* dict_table_t, table_id_t */
+#include "os0sync.h" /* os_event_t */
+#include "os0thread.h" /* DECLARE_THREAD */
+
+/** Event to wake up the stats thread */
+extern os_event_t	dict_stats_event;
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped. */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+	const dict_table_t*	table);	/*!< in: table to add */
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+	const dict_table_t*	table);	/*!< in: table to remove */
+
+/** Yield the data dictionary latch when waiting
+for the background thread to stop accessing a table.
+@param trx	transaction holding the data dictionary locks */
+#define DICT_STATS_BG_YIELD(trx)	do {	\
+	row_mysql_unlock_data_dictionary(trx);	\
+	os_thread_sleep(250000);		\
+	row_mysql_lock_data_dictionary(trx);	\
+} while (0)
+
+/*****************************************************************//**
+Request the background collection of statistics to stop for a table.
+@retval true when no background process is active
+@retval false when it is not safe to modify the table definition */
+UNIV_INLINE
+bool
+dict_stats_stop_bg(
+/*===============*/
+	dict_table_t*	table)	/*!< in/out: table */
+	__attribute__((warn_unused_result));
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table.
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thread is guaranteed not to start using the specified
+table after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys->mutex. */
+UNIV_INTERN
+void
+dict_stats_wait_bg_to_stop_using_table(
+/*===================================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx);	/*!< in/out: transaction to use for
+				unlocking/locking the data dict */
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread().
+Must be called before dict_stats_thread() is started. */
+UNIV_INTERN
+void
+dict_stats_thread_init();
+/*====================*/
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_thread_init(), must be called
+after dict_stats_thread() has exited. */
+UNIV_INTERN
+void
+dict_stats_thread_deinit();
+/*======================*/
+
+/*****************************************************************//**
+This is the thread for background stats gathering. It pops tables, from
+the auto recalc list and proceeds them, eventually recalculating their
+statistics.
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(dict_stats_thread)(
+/*==============================*/
+	void*	arg);	/*!< in: a dummy parameter
+			required by os_thread_create */
+
+# ifndef UNIV_NONINL
+#  include "dict0stats_bg.ic"
+# endif
+
+#endif /* dict0stats_bg_h */
diff --git a/storage/innobase/include/dict0stats_bg.ic b/storage/innobase/include/dict0stats_bg.ic
new file mode 100644
index 00000000000..87e3225de58
--- /dev/null
+++ b/storage/innobase/include/dict0stats_bg.ic
@@ -0,0 +1,45 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.ic
+Code used for background table and index stats gathering.
+
+Created Feb 8, 2013 Marko Makela
+*******************************************************/
+
+/*****************************************************************//**
+Request the background collection of statistics to stop for a table.
+@retval true when no background process is active
+@retval false when it is not safe to modify the table definition */
+UNIV_INLINE
+bool
+dict_stats_stop_bg(
+/*===============*/
+	dict_table_t*	table)	/*!< in/out: table */
+{
+	ut_ad(!srv_read_only_mode);
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) {
+		return(true);
+	}
+
+	table->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+	return(false);
+}
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
new file mode 100644
index 00000000000..d34b6f7eab3
--- /dev/null
+++ b/storage/innobase/include/dict0types.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0types.h
+Data dictionary global types
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0types_h
+#define dict0types_h
+
+struct dict_sys_t;
+struct dict_col_t;
+struct dict_field_t;
+struct dict_index_t;
+struct dict_table_t;
+struct dict_foreign_t;
+
+struct ind_node_t;
+struct tab_node_t;
+
+/* Space id and page no where the dictionary header resides */
+#define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
+#define	DICT_HDR_PAGE_NO	FSP_DICT_HDR_PAGE_NO
+
+/* The ibuf table and indexes's ID are assigned as the number
+DICT_IBUF_ID_MIN plus the space id */
+#define DICT_IBUF_ID_MIN	0xFFFFFFFF00000000ULL
+
+typedef ib_id_t		table_id_t;
+typedef ib_id_t		index_id_t;
+
+/** Error to ignore when we load table dictionary into memory. However,
+the table and index will be marked as "corrupted", and caller will
+be responsible to deal with corrupted table or index.
+Note: please define the IGNORE_ERR_* as bits, so their value can
+be or-ed together */
+enum dict_err_ignore_t {
+	DICT_ERR_IGNORE_NONE = 0,	/*!< no error to ignore */
+	DICT_ERR_IGNORE_INDEX_ROOT = 1,	/*!< ignore error if index root
+					page is FIL_NULL or incorrect value */
+	DICT_ERR_IGNORE_CORRUPT = 2,	/*!< skip corrupted indexes */
+	DICT_ERR_IGNORE_FK_NOKEY = 4,	/*!< ignore error if any foreign
+					key is missing */
+	DICT_ERR_IGNORE_RECOVER_LOCK = 8,
+					/*!< Used when recovering table locks
+					for resurrected transactions.
+					Silently load a missing
+					tablespace, and do not load
+					incomplete index definitions. */
+	DICT_ERR_IGNORE_ALL = 0xFFFF	/*!< ignore all errors */
+};
+
+/** Quiescing states for flushing tables to disk. */
+enum ib_quiesce_t {
+	QUIESCE_NONE,
+	QUIESCE_START,			/*!< Initialise, prepare to start */
+	QUIESCE_COMPLETE		/*!< All done */
+};
+
+/** Prefix for tmp tables, adopted from sql/table.h */
+#define tmp_file_prefix		"#sql"
+#define tmp_file_prefix_length	4
+#define TEMP_FILE_PREFIX_INNODB	"#sql-ib"
+
+#define TEMP_TABLE_PREFIX                "#sql"
+#define TEMP_TABLE_PATH_PREFIX           "/" TEMP_TABLE_PREFIX
+
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/** Flag to control insert buffer debugging. */
+extern uint		ibuf_debug;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+#endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
new file mode 100644
index 00000000000..7f23302d1ff
--- /dev/null
+++ b/storage/innobase/include/dyn0dyn.h
@@ -0,0 +1,199 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0dyn.h
+The dynamically allocated array
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dyn0dyn_h
+#define dyn0dyn_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+
+/** A block in a dynamically allocated array */
+struct dyn_block_t;
+/** Dynamically allocated array */
+typedef dyn_block_t		dyn_array_t;
+
+/** This is the initial 'payload' size of a dynamic array;
+this must be > MLOG_BUF_MARGIN + 30! */
+#define	DYN_ARRAY_DATA_SIZE	512
+
+/*********************************************************************//**
+Initializes a dynamic array.
+@return	initialized dyn array */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+	dyn_array_t*	arr)	/*!< in/out memory buffer of
+				size sizeof(dyn_array_t) */
+	__attribute__((nonnull));
+/************************************************************//**
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr)	/*!< in,own: dyn array */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close.
+@return	pointer to the buffer */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	const byte*	ptr)	/*!< in: end of used space */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Makes room on top of a dyn array and returns a pointer to
+the added element. The caller must copy the element to
+the pointer returned.
+@return	pointer to the element */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+	dyn_array_t*	arr,	/*!< in/out: dynamic array */
+	ulint		size)	/*!< in: size in bytes of the element */
+	__attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Returns pointer to an element in dyn array.
+@return	pointer to element */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+	const dyn_array_t*	arr,	/*!< in: dyn array */
+	ulint			pos)	/*!< in: position of element
+					in bytes from array start */
+	__attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Returns the size of stored data in a dyn array.
+@return	data size in bytes */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+	const dyn_array_t*	arr)	/*!< in: dyn array */
+	__attribute__((nonnull, warn_unused_result, pure));
+/************************************************************//**
+Gets the first block in a dyn array.
+@param arr	dyn array
+@return		first block */
+#define dyn_array_get_first_block(arr) (arr)
+/************************************************************//**
+Gets the last block in a dyn array.
+@param arr	dyn array
+@return		last block */
+#define dyn_array_get_last_block(arr)				\
+	((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr))
+/********************************************************************//**
+Gets the next block in a dyn array.
+@param arr	dyn array
+@param block	dyn array block
+@return		pointer to next, NULL if end of list */
+#define dyn_array_get_next_block(arr, block)			\
+	((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL)
+/********************************************************************//**
+Gets the previous block in a dyn array.
+@param arr	dyn array
+@param block	dyn array block
+@return		pointer to previous, NULL if end of list */
+#define dyn_array_get_prev_block(arr, block)			\
+	((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL)
+/********************************************************************//**
+Gets the number of used bytes in a dyn array block.
+@return	number of bytes used */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+	const dyn_block_t*	block)	/*!< in: dyn array block */
+	__attribute__((nonnull, warn_unused_result, pure));
+/********************************************************************//**
+Gets pointer to the start of data in a dyn array block.
+@return	pointer to data */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+	const dyn_block_t*	block)	/*!< in: dyn array block */
+	__attribute__((nonnull, warn_unused_result, pure));
+/********************************************************//**
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/*!< in/out: dyn array */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len)	/*!< in: string length */
+	__attribute__((nonnull));
+
+/*#################################################################*/
+
+/** @brief A block in a dynamically allocated array.
+NOTE! Do not access the fields of the struct directly: the definition
+appears here only for the compiler to know its size! */
+struct dyn_block_t{
+	mem_heap_t*	heap;	/*!< in the first block this is != NULL
+				if dynamic allocation has been needed */
+	ulint		used;	/*!< number of data bytes used in this block;
+				DYN_BLOCK_FULL_FLAG is set when the block
+				becomes full */
+	byte		data[DYN_ARRAY_DATA_SIZE];
+				/*!< storage for array elements */
+	UT_LIST_BASE_NODE_T(dyn_block_t) base;
+				/*!< linear list of dyn blocks: this node is
+				used only in the first block */
+	UT_LIST_NODE_T(dyn_block_t) list;
+				/*!< linear list node: used in all blocks */
+#ifdef UNIV_DEBUG
+	ulint		buf_end;/*!< only in the debug version: if dyn
+				array is opened, this is the buffer
+				end offset, else this is 0 */
+	ulint		magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
+#endif
+};
+
+
+#ifndef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
new file mode 100644
index 00000000000..0296554e2ee
--- /dev/null
+++ b/storage/innobase/include/dyn0dyn.ic
@@ -0,0 +1,306 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dyn0dyn.ic
+The dynamically allocated array
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+/** Value of dyn_block_t::magic_n */
+#define DYN_BLOCK_MAGIC_N	375767
+/** Flag for dyn_block_t::used that indicates a full block */
+#define DYN_BLOCK_FULL_FLAG	0x1000000UL
+
+/************************************************************//**
+Adds a new block to a dyn array.
+@return	created block */
+UNIV_INTERN
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+	dyn_array_t*	arr)	/*!< in/out: dyn array */
+	__attribute__((nonnull, warn_unused_result));
+
+/********************************************************************//**
+Gets the number of used bytes in a dyn array block.
+@return	number of bytes used */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+	const dyn_block_t*	block)	/*!< in: dyn array block */
+{
+	ut_ad(block);
+
+	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
+}
+
+/********************************************************************//**
+Gets pointer to the start of data in a dyn array block.
+@return	pointer to data */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+	const dyn_block_t*	block)	/*!< in: dyn array block */
+{
+	ut_ad(block);
+
+	return(const_cast<byte*>(block->data));
+}
+
+/*********************************************************************//**
+Initializes a dynamic array.
+@return	initialized dyn array */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+	dyn_array_t*	arr)	/*!< in/out: memory buffer of
+				size sizeof(dyn_array_t) */
+{
+	ut_ad(arr);
+#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
+# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
+#endif
+
+	arr->heap = NULL;
+	arr->used = 0;
+
+	ut_d(arr->buf_end = 0);
+	ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N);
+
+	return(arr);
+}
+
+/************************************************************//**
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr)	/*!< in: dyn array */
+{
+	if (arr->heap != NULL) {
+		mem_heap_free(arr->heap);
+	}
+
+	ut_d(arr->magic_n = 0);
+}
+
+/*********************************************************************//**
+Makes room on top of a dyn array and returns a pointer to the added element.
+The caller must copy the element to the pointer returned.
+@return	pointer to the element */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+	dyn_array_t*	arr,	/*!< in/out: dynamic array */
+	ulint		size)	/*!< in: size in bytes of the element */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(size);
+
+	block = arr;
+
+	if (block->used + size > DYN_ARRAY_DATA_SIZE) {
+		/* Get the last array block */
+
+		block = dyn_array_get_last_block(arr);
+
+		if (block->used + size > DYN_ARRAY_DATA_SIZE) {
+			block = dyn_array_add_block(arr);
+		}
+	}
+
+	used = block->used;
+
+	block->used = used + size;
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+	return(block->data + used);
+}
+
+/*********************************************************************//**
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close.
+@return	pointer to the buffer */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+	dyn_array_t*	arr,	/*!< in: dynamic array */
+	ulint		size)	/*!< in: size in bytes of the buffer; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
+{
+	dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(size);
+
+	block = arr;
+
+	if (block->used + size > DYN_ARRAY_DATA_SIZE) {
+		/* Get the last array block */
+
+		block = dyn_array_get_last_block(arr);
+
+		if (block->used + size > DYN_ARRAY_DATA_SIZE) {
+			block = dyn_array_add_block(arr);
+			ut_a(size <= DYN_ARRAY_DATA_SIZE);
+		}
+	}
+
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(arr->buf_end == 0);
+	ut_d(arr->buf_end = block->used + size);
+
+	return(block->data + block->used);
+}
+
+/*********************************************************************//**
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/*!< in/out: dynamic array */
+	const byte*	ptr)	/*!< in: end of used space */
+{
+	dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	block = dyn_array_get_last_block(arr);
+
+	ut_ad(arr->buf_end + block->data >= ptr);
+
+	block->used = ptr - block->data;
+
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+	ut_d(arr->buf_end = 0);
+}
+
+/************************************************************//**
+Returns pointer to an element in dyn array.
+@return	pointer to element */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+	const dyn_array_t*	arr,	/*!< in: dyn array */
+	ulint			pos)	/*!< in: position of element
+					in bytes from array start */
+{
+	const dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	/* Get the first array block */
+	block = dyn_array_get_first_block(arr);
+
+	if (arr->heap != NULL) {
+		for (;;) {
+			ulint	used = dyn_block_get_used(block);
+
+			if (pos < used) {
+				break;
+			}
+
+			pos -= used;
+			block = UT_LIST_GET_NEXT(list, block);
+			ut_ad(block);
+		}
+	}
+
+	ut_ad(block);
+	ut_ad(dyn_block_get_used(block) >= pos);
+
+	return(const_cast<byte*>(block->data) + pos);
+}
+
+/************************************************************//**
+Returns the size of stored data in a dyn array.
+@return	data size in bytes */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+	const dyn_array_t*	arr)	/*!< in: dyn array */
+{
+	const dyn_block_t*	block;
+	ulint			sum	= 0;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	if (arr->heap == NULL) {
+
+		return(arr->used);
+	}
+
+	/* Get the first array block */
+	block = dyn_array_get_first_block(arr);
+
+	while (block != NULL) {
+		sum += dyn_block_get_used(block);
+		block = dyn_array_get_next_block(arr, block);
+	}
+
+	return(sum);
+}
+
+/********************************************************//**
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/*!< in/out: dyn array */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len)	/*!< in: string length */
+{
+	ulint	n_copied;
+
+	while (len > 0) {
+		if (len > DYN_ARRAY_DATA_SIZE) {
+			n_copied = DYN_ARRAY_DATA_SIZE;
+		} else {
+			n_copied = len;
+		}
+
+		memcpy(dyn_array_push(arr, n_copied), str, n_copied);
+
+		str += n_copied;
+		len -= n_copied;
+	}
+}
diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h
new file mode 100644
index 00000000000..e3b1e6c16b6
--- /dev/null
+++ b/storage/innobase/include/eval0eval.h
@@ -0,0 +1,114 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0eval.h
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0eval_h
+#define eval0eval_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/*****************************************************************//**
+Free the buffer from global dynamic memory for a value of a que_node,
+if it has been allocated in the above function. The freeing for pushed
+column values is done in sel_col_prefetch_buf_free. */
+UNIV_INTERN
+void
+eval_node_free_val_buf(
+/*===================*/
+	que_node_t*	node);	/*!< in: query graph node */
+/*****************************************************************//**
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+	sym_node_t*	sym_node);	/*!< in: symbol table node */
+/*****************************************************************//**
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+	que_node_t*	exp_node);	/*!< in: expression */
+/*****************************************************************//**
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+	que_node_t*	node,	/*!< in: expression node */
+	lint		val);	/*!< in: value to set */
+/*****************************************************************//**
+Gets an integer value from an expression node.
+@return	integer value */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+	que_node_t*	node);	/*!< in: expression node */
+/*****************************************************************//**
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+	que_node_t*	node,	/*!< in: query graph node */
+	const byte*	str,	/*!< in: binary string */
+	ulint		len);	/*!< in: string length or UNIV_SQL_NULL */
+/*****************************************************************//**
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+	que_node_t*	node1,	/*!< in: node to copy to */
+	que_node_t*	node2);	/*!< in: node to copy from */
+/*****************************************************************//**
+Gets a iboolean value from a query node.
+@return	iboolean value */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*====================*/
+	que_node_t*	node);	/*!< in: query graph node */
+/*****************************************************************//**
+Evaluates a comparison node.
+@return	the result of the comparison */
+UNIV_INTERN
+ibool
+eval_cmp(
+/*=====*/
+	func_node_t*	cmp_node);	/*!< in: comparison node */
+
+
+#ifndef UNIV_NONINL
+#include "eval0eval.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic
new file mode 100644
index 00000000000..e4b1dd08017
--- /dev/null
+++ b/storage/innobase/include/eval0eval.ic
@@ -0,0 +1,255 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0eval.ic
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "pars0grm.h"
+
+/*****************************************************************//**
+Evaluates a function node. */
+UNIV_INTERN
+void
+eval_func(
+/*======*/
+	func_node_t*	func_node);	/*!< in: function node */
+/*****************************************************************//**
+Allocate a buffer from global dynamic memory for a value of a que_node.
+NOTE that this memory must be explicitly freed when the query graph is
+freed. If the node already has allocated buffer, that buffer is freed
+here. NOTE that this is the only function where dynamic memory should be
+allocated for a query node val field.
+@return	pointer to allocated buffer */
+UNIV_INTERN
+byte*
+eval_node_alloc_val_buf(
+/*====================*/
+	que_node_t*	node,	/*!< in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size);	/*!< in: buffer size */
+
+
+/*****************************************************************//**
+Allocates a new buffer if needed.
+@return	pointer to buffer */
+UNIV_INLINE
+byte*
+eval_node_ensure_val_buf(
+/*=====================*/
+	que_node_t*	node,	/*!< in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size)	/*!< in: buffer size */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+	dfield_set_len(dfield, size);
+
+	data = static_cast<byte*>(dfield_get_data(dfield));
+
+	if (!data || que_node_get_val_buf_size(node) < size) {
+
+		data = eval_node_alloc_val_buf(node, size);
+	}
+
+	return(data);
+}
+
+/*****************************************************************//**
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+	sym_node_t*	sym_node)	/*!< in: symbol table node */
+{
+
+	ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
+
+	if (sym_node->indirection) {
+		/* The symbol table node is an alias for a variable or a
+		column */
+
+		dfield_copy_data(que_node_get_val(sym_node),
+				 que_node_get_val(sym_node->indirection));
+	}
+}
+
+/*****************************************************************//**
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+	que_node_t*	exp_node)	/*!< in: expression */
+{
+	if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
+
+		eval_sym((sym_node_t*) exp_node);
+
+		return;
+	}
+
+	eval_func(static_cast<func_node_t*>(exp_node));
+}
+
+/*****************************************************************//**
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+	que_node_t*	node,	/*!< in: expression node */
+	lint		val)	/*!< in: value to set */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+
+	data = static_cast<byte*>(dfield_get_data(dfield));
+
+	if (data == NULL) {
+		data = eval_node_alloc_val_buf(node, 4);
+	}
+
+	ut_ad(dfield_get_len(dfield) == 4);
+
+	mach_write_to_4(data, (ulint) val);
+}
+
+/*****************************************************************//**
+Gets an integer non-SQL null value from an expression node.
+@return	integer value */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+	que_node_t*	node)	/*!< in: expression node */
+{
+	const byte*	ptr;
+	dfield_t*	dfield;
+
+	dfield = que_node_get_val(node);
+	ptr = static_cast<byte*>(dfield_get_data(dfield));
+
+	ut_ad(dfield_get_len(dfield) == 4);
+
+	return((int) mach_read_from_4(ptr));
+}
+
+/*****************************************************************//**
+Gets a iboolean value from a query node.
+@return	iboolean value */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*====================*/
+	que_node_t*	node)	/*!< in: query graph node */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+
+	data = static_cast<byte*>(dfield_get_data(dfield));
+
+	ut_ad(data != NULL);
+
+	return(mach_read_from_1(data));
+}
+
+/*****************************************************************//**
+Sets a iboolean value as the value of a function node. */
+UNIV_INLINE
+void
+eval_node_set_ibool_val(
+/*====================*/
+	func_node_t*	func_node,	/*!< in: function node */
+	ibool		val)		/*!< in: value to set */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(func_node);
+
+	data = static_cast<byte*>(dfield_get_data(dfield));
+
+	if (data == NULL) {
+		/* Allocate 1 byte to hold the value */
+
+		data = eval_node_alloc_val_buf(func_node, 1);
+	}
+
+	ut_ad(dfield_get_len(dfield) == 1);
+
+	mach_write_to_1(data, val);
+}
+
+/*****************************************************************//**
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+	que_node_t*	node,	/*!< in: query graph node */
+	const byte*	str,	/*!< in: binary string */
+	ulint		len)	/*!< in: string length or UNIV_SQL_NULL */
+{
+	byte*		data;
+
+	if (len == UNIV_SQL_NULL) {
+		dfield_set_len(que_node_get_val(node), len);
+
+		return;
+	}
+
+	data = eval_node_ensure_val_buf(node, len);
+
+	ut_memcpy(data, str, len);
+}
+
+/*****************************************************************//**
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+	que_node_t*	node1,	/*!< in: node to copy to */
+	que_node_t*	node2)	/*!< in: node to copy from */
+{
+	dfield_t*	dfield2;
+
+	dfield2 = que_node_get_val(node2);
+
+	eval_node_copy_and_alloc_val(
+		node1,
+		static_cast<byte*>(dfield_get_data(dfield2)),
+		dfield_get_len(dfield2));
+}
diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h
new file mode 100644
index 00000000000..7755fb10343
--- /dev/null
+++ b/storage/innobase/include/eval0proc.h
@@ -0,0 +1,104 @@
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0proc.h
+Executes SQL stored procedures and their control structures
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0proc_h
+#define eval0proc_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/**********************************************************************//**
+Performs an execution step of a procedure node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an if-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+if_step(
+/*====*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a while-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+while_step(
+/*=======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a for-loop node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+for_step(
+/*=====*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an assignment statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+assign_step(
+/*========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a procedure call node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an exit statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+exit_step(
+/*======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of a return-statement node.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+return_step(
+/*========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic
new file mode 100644
index 00000000000..81418bae2c9
--- /dev/null
+++ b/storage/innobase/include/eval0proc.ic
@@ -0,0 +1,88 @@
+/*****************************************************************************
+
+Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/eval0proc.ic
+Executes SQL stored procedures and their control structures
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "pars0pars.h"
+#include "que0que.h"
+#include "eval0eval.h"
+
+/**********************************************************************//**
+Performs an execution step of a procedure node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	proc_node_t*	node;
+
+	ut_ad(thr);
+
+	node = static_cast<proc_node_t*>(thr->run_node);
+	ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
+
+	if (thr->prev_node == que_node_get_parent(node)) {
+		/* Start execution from the first statement in the statement
+		list */
+
+		thr->run_node = node->stat_list;
+	} else {
+		/* Move to the next statement */
+		ut_ad(que_node_get_next(thr->prev_node) == NULL);
+
+		thr->run_node = NULL;
+	}
+
+	if (thr->run_node == NULL) {
+		thr->run_node = que_node_get_parent(node);
+	}
+
+	return(thr);
+}
+
+/**********************************************************************//**
+Performs an execution step of a procedure call node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	func_node_t*	node;
+
+	ut_ad(thr);
+
+	node = static_cast<func_node_t*>(thr->run_node);
+	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+	/* Evaluate the procedure */
+
+	eval_exp(node);
+
+	thr->run_node = que_node_get_parent(node);
+
+	return(thr);
+}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
new file mode 100644
index 00000000000..168f2f5b594
--- /dev/null
+++ b/storage/innobase/include/fil0fil.h
@@ -0,0 +1,1019 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fil0fil.h
+The low-level file system
+
+Created 10/25/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fil0fil_h
+#define fil0fil_h
+
+#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "dict0types.h"
+#include "ut0byte.h"
+#include "os0file.h"
+#ifndef UNIV_HOTBACKUP
+#include "sync0rw.h"
+#include "ibuf0types.h"
+#include "log0log.h"
+#endif /* !UNIV_HOTBACKUP */
+
+#include <list>
+
+extern my_bool lower_case_file_system;
+// Forward declaration
+struct trx_t;
+struct fil_space_t;
+
+typedef std::list<const char*> space_name_list_t;
+
+/** When mysqld is run, the default directory "." is the mysqld datadir,
+but in the MySQL Embedded Server Library and mysqlbackup it is not the default
+directory, and we must set the base file path explicitly */
+extern const char*	fil_path_to_mysql_datadir;
+
+/** Initial size of a single-table tablespace in pages */
+#define FIL_IBD_FILE_INITIAL_SIZE	4
+
+/** 'null' (undefined) page offset in the context of file spaces */
+#define	FIL_NULL	ULINT32_UNDEFINED
+
+/* Space address data type; this is intended to be used when
+addresses accurate to a byte are stored in file pages. If the page part
+of the address is FIL_NULL, the address is considered undefined. */
+
+typedef	byte	fil_faddr_t;	/*!< 'type' definition in C: an address
+				stored in a file page is a string of bytes */
+#define FIL_ADDR_PAGE	0	/* first in address is the page offset */
+#define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
+
+#define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
+
+/** File space address */
+struct fil_addr_t{
+	ulint	page;		/*!< page number within a space */
+	ulint	boffset;	/*!< byte offset within the page */
+};
+
+/** The null file address */
+extern fil_addr_t	fil_addr_null;
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/** The byte offsets on a file page for various variables @{ */
+#define FIL_PAGE_SPACE_OR_CHKSUM 0	/*!< in < MySQL-4.0.14 space id the
+					page belongs to (== 0) but in later
+					versions the 'new' checksum of the
+					page */
+#define FIL_PAGE_OFFSET		4	/*!< page offset inside space */
+#define FIL_PAGE_PREV		8	/*!< if there is a 'natural'
+					predecessor of the page, its
+					offset.  Otherwise FIL_NULL.
+					This field is not set on BLOB
+					pages, which are stored as a
+					singly-linked list.  See also
+					FIL_PAGE_NEXT. */
+#define FIL_PAGE_NEXT		12	/*!< if there is a 'natural' successor
+					of the page, its offset.
+					Otherwise FIL_NULL.
+					B-tree index pages
+					(FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
+					on the same PAGE_LEVEL are maintained
+					as a doubly linked list via
+					FIL_PAGE_PREV and FIL_PAGE_NEXT
+					in the collation order of the
+					smallest user record on each page. */
+#define FIL_PAGE_LSN		16	/*!< lsn of the end of the newest
+					modification log record to the page */
+#define	FIL_PAGE_TYPE		24	/*!< file page type: FIL_PAGE_INDEX,...,
+					2 bytes.
+
+					The contents of this field can only
+					be trusted in the following case:
+					if the page is an uncompressed
+					B-tree index page, then it is
+					guaranteed that the value is
+					FIL_PAGE_INDEX.
+					The opposite does not hold.
+
+					In tablespaces created by
+					MySQL/InnoDB 5.1.7 or later, the
+					contents of this field is valid
+					for all uncompressed pages. */
+#define FIL_PAGE_FILE_FLUSH_LSN	26	/*!< this is only defined for the
+					first page in a system tablespace
+					data file (ibdata*, not *.ibd):
+					the file has been flushed to disk
+					at least up to this lsn */
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
+					contains the space id of the page */
+#define FIL_PAGE_SPACE_ID  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
+
+#define FIL_PAGE_DATA		38	/*!< start of the data on the page */
+/* @} */
+/** File page trailer @{ */
+#define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/*!< the low 4 bytes of this are used
+					to store the page checksum, the
+					last 4 bytes should be identical
+					to the last 4 bytes of FIL_PAGE_LSN */
+#define FIL_PAGE_DATA_END	8	/*!< size of the page trailer */
+/* @} */
+
+#ifndef UNIV_INNOCHECKSUM
+
+/** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_INDEX		17855	/*!< B-tree node */
+#define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
+#define FIL_PAGE_INODE		3	/*!< Index node */
+#define FIL_PAGE_IBUF_FREE_LIST	4	/*!< Insert buffer free list */
+/* File page types introduced in MySQL/InnoDB 5.1.7 */
+#define FIL_PAGE_TYPE_ALLOCATED	0	/*!< Freshly allocated page */
+#define FIL_PAGE_IBUF_BITMAP	5	/*!< Insert buffer bitmap */
+#define FIL_PAGE_TYPE_SYS	6	/*!< System page */
+#define FIL_PAGE_TYPE_TRX_SYS	7	/*!< Transaction system data */
+#define FIL_PAGE_TYPE_FSP_HDR	8	/*!< File space header */
+#define FIL_PAGE_TYPE_XDES	9	/*!< Extent descriptor page */
+#define FIL_PAGE_TYPE_BLOB	10	/*!< Uncompressed BLOB page */
+#define FIL_PAGE_TYPE_ZBLOB	11	/*!< First compressed BLOB page */
+#define FIL_PAGE_TYPE_ZBLOB2	12	/*!< Subsequent compressed BLOB page */
+#define FIL_PAGE_TYPE_LAST	FIL_PAGE_TYPE_ZBLOB2
+					/*!< Last page type */
+/* @} */
+
+/** Space types @{ */
+#define FIL_TABLESPACE		501	/*!< tablespace */
+#define FIL_LOG			502	/*!< redo log */
+/* @} */
+
+/** The number of fsyncs done to the log */
+extern ulint	fil_n_log_flushes;
+
+/** Number of pending redo log flushes */
+extern ulint	fil_n_pending_log_flushes;
+/** Number of pending tablespace flushes */
+extern ulint	fil_n_pending_tablespace_flushes;
+
+/** Number of files currently open */
+extern ulint	fil_n_file_opened;
+
+struct fsp_open_info {
+	ibool		success;	/*!< Has the tablespace been opened? */
+	const char*	check_msg;	/*!< fil_check_first_page() message */
+	ibool		valid;		/*!< Is the tablespace valid? */
+	os_file_t	file;		/*!< File handle */
+	char*		filepath;	/*!< File path to open */
+	lsn_t		lsn;		/*!< Flushed LSN from header page */
+	ulint		id;		/*!< Space ID */
+	ulint		flags;		/*!< Tablespace flags */
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		arch_log_no;	/*!< latest archived log file number */
+#endif /* UNIV_LOG_ARCHIVE */
+};
+
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Returns the version number of a tablespace, -1 if not found.
+@return version number, -1 if the tablespace does not exist in the
+memory cache */
+UNIV_INTERN
+ib_int64_t
+fil_space_get_version(
+/*==================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the latch of a file space.
+@return	latch protecting storage allocation */
+UNIV_INTERN
+rw_lock_t*
+fil_space_get_latch(
+/*================*/
+	ulint	id,	/*!< in: space id */
+	ulint*	zip_size);/*!< out: compressed page size, or
+			0 for uncompressed tablespaces */
+/*******************************************************************//**
+Returns the type of a file space.
+@return	FIL_TABLESPACE or FIL_LOG */
+UNIV_INTERN
+ulint
+fil_space_get_type(
+/*===============*/
+	ulint	id);	/*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Appends a new file to the chain of files of a space. File must be closed.
+@return pointer to the file name, or NULL on error */
+UNIV_INTERN
+char*
+fil_node_create(
+/*============*/
+	const char*	name,	/*!< in: file name (file must be closed) */
+	ulint		size,	/*!< in: file size in database blocks, rounded
+				downwards to an integer */
+	ulint		id,	/*!< in: space id where to append */
+	ibool		is_raw)	/*!< in: TRUE if a raw device or
+				a raw disk partition */
+	__attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_LOG_ARCHIVE
+/****************************************************************//**
+Drops files from the start of a file space, so that its size is cut by
+the amount given. */
+UNIV_INTERN
+void
+fil_space_truncate_start(
+/*=====================*/
+	ulint	id,		/*!< in: space id */
+	ulint	trunc_len);	/*!< in: truncate by this much; it is an error
+				if this does not equal to the combined size of
+				some initial files in the space */
+#endif /* UNIV_LOG_ARCHIVE */
+/*******************************************************************//**
+Creates a space memory object and puts it to the 'fil system' hash table.
+If there is an error, prints an error message to the .err log.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+fil_space_create(
+/*=============*/
+	const char*	name,	/*!< in: space name */
+	ulint		id,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size, or
+				0 for uncompressed tablespaces */
+	ulint		purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+/*******************************************************************//**
+Assigns a new space id for a new single-table tablespace. This works simply by
+incrementing the global counter. If 4 billion id's is not enough, we may need
+to recycle id's.
+@return	TRUE if assigned, FALSE if not */
+UNIV_INTERN
+ibool
+fil_assign_new_space_id(
+/*====================*/
+	ulint*	space_id);	/*!< in/out: space id */
+/*******************************************************************//**
+Returns the path from the first fil_node_t found for the space ID sent.
+The caller is responsible for freeing the memory allocated here for the
+value returned.
+@return	a copy of fil_node_t::path, NULL if space is zero or not found. */
+UNIV_INTERN
+char*
+fil_space_get_first_path(
+/*=====================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache.
+@return	space size, 0 if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_size(
+/*===============*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the flags of the space. The tablespace must be cached
+in the memory cache.
+@return	flags, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_flags(
+/*================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the compressed page size of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	compressed page size, ULINT_UNDEFINED if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_zip_size(
+/*===================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache.
+@return	TRUE if the address is meaningful */
+UNIV_INTERN
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+	ulint	id,	/*!< in: space id */
+	ulint	page_no);/*!< in: page number */
+/****************************************************************//**
+Initializes the tablespace memory cache. */
+UNIV_INTERN
+void
+fil_init(
+/*=====*/
+	ulint	hash_size,	/*!< in: hash table size */
+	ulint	max_n_open);	/*!< in: max number of open files */
+/*******************************************************************//**
+Initializes the tablespace memory cache. */
+UNIV_INTERN
+void
+fil_close(void);
+/*===========*/
+/*******************************************************************//**
+Opens all log files and system tablespace data files. They stay open until the
+database server shutdown. This should be called at a server startup after the
+space objects for the log and the system tablespace have been created. The
+purpose of this operation is to make sure we never run out of file descriptors
+if we need to read from the insert buffer or to write to the log. */
+UNIV_INTERN
+void
+fil_open_log_and_system_tablespace_files(void);
+/*==========================================*/
+/*******************************************************************//**
+Closes all open files. There must not be any pending i/o's or not flushed
+modifications in the files. */
+UNIV_INTERN
+void
+fil_close_all_files(void);
+/*=====================*/
+/*******************************************************************//**
+Closes the redo log files. There must not be any pending i/o's or not
+flushed modifications in the files. */
+UNIV_INTERN
+void
+fil_close_log_files(
+/*================*/
+	bool	free);	/*!< in: whether to free the memory object */
+/*******************************************************************//**
+Sets the max tablespace id counter if the given number is bigger than the
+previous value. */
+UNIV_INTERN
+void
+fil_set_max_space_id_if_bigger(
+/*===========================*/
+	ulint	max_id);/*!< in: maximum known id */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Writes the flushed lsn and the latest archived log number to the page
+header of the first page of each data file in the system tablespace.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+fil_write_flushed_lsn_to_data_files(
+/*================================*/
+	lsn_t	lsn,		/*!< in: lsn to write */
+	ulint	arch_log_no);	/*!< in: latest archived log file number */
+/*******************************************************************//**
+Reads the flushed lsn, arch no, and tablespace flag fields from a data
+file at database startup.
+@retval NULL on success, or if innodb_force_recovery is set
+@return pointer to an error message string */
+UNIV_INTERN
+const char*
+fil_read_first_page(
+/*================*/
+	os_file_t	data_file,		/*!< in: open data file */
+	ibool		one_read_already,	/*!< in: TRUE if min and max
+						parameters below already
+						contain sensible data */
+	ulint*		flags,			/*!< out: tablespace flags */
+	ulint*		space_id,		/*!< out: tablespace ID */
+#ifdef UNIV_LOG_ARCHIVE
+	ulint*		min_arch_log_no,	/*!< out: min of archived
+						log numbers in data files */
+	ulint*		max_arch_log_no,	/*!< out: max of archived
+						log numbers in data files */
+#endif /* UNIV_LOG_ARCHIVE */
+	lsn_t*		min_flushed_lsn,	/*!< out: min of flushed
+						lsn values in data files */
+	lsn_t*		max_flushed_lsn)	/*!< out: max of flushed
+						lsn values in data files */
+	__attribute__((warn_unused_result));
+/*******************************************************************//**
+Increments the count of pending operation, if space is not being deleted.
+@return	TRUE if being deleted, and operation should be skipped */
+UNIV_INTERN
+ibool
+fil_inc_pending_ops(
+/*================*/
+	ulint	id,		/*!< in: space id */
+	ibool	print_err);	/*!< in: need to print error or not */
+/*******************************************************************//**
+Decrements the count of pending operations. */
+UNIV_INTERN
+void
+fil_decr_pending_ops(
+/*=================*/
+	ulint	id);	/*!< in: space id */
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Parses the body of a log record written about an .ibd file operation. That is,
+the log record part after the standard (type, space id, page no) header of the
+log record.
+
+If desired, also replays the delete or rename operation if the .ibd file
+exists and the space id in it matches. Replays the create operation if a file
+at that path does not exist yet. If the database directory for the file to be
+created does not exist, then we create the directory, too.
+
+Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
+the datadir that we should use in replaying the file operations.
+@return end of log record, or NULL if the record was not completely
+contained between ptr and end_ptr */
+UNIV_INTERN
+byte*
+fil_op_log_parse_or_replay(
+/*=======================*/
+	byte*	ptr,		/*!< in: buffer containing the log record body,
+				or an initial segment of it, if the record does
+				not fir completely between ptr and end_ptr */
+	byte*	end_ptr,	/*!< in: buffer end */
+	ulint	type,		/*!< in: the type of this log record */
+	ulint	space_id,	/*!< in: the space id of the tablespace in
+				question, or 0 if the log record should
+				only be parsed but not replayed */
+	ulint	log_flags);	/*!< in: redo log flags
+				(stored in the page number parameter) */
+/*******************************************************************//**
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache.
+@return	TRUE if success */
+UNIV_INTERN
+dberr_t
+fil_delete_tablespace(
+/*==================*/
+	ulint		id,		/*!< in: space id */
+	buf_remove_t	buf_remove);	/*!< in: specify the action to take
+					on the tables pages in the buffer
+					pool */
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_close_tablespace(
+/*=================*/
+	trx_t*	trx,	/*!< in/out: Transaction covering the close */
+	ulint	id);	/*!< in: space id */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Discards a single-table tablespace. The tablespace must be cached in the
+memory cache. Discarding is like deleting a tablespace, but
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+    in DROP TABLE they are only removed gradually in the background;
+
+ 3. When the user does IMPORT TABLESPACE, the tablespace will have the
+    same id as it originally had.
+
+ 4. Free all the pages in use by the tablespace if rename=TRUE.
+@return	DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_discard_tablespace(
+/*===================*/
+	ulint	id)	/*!< in: space id */
+	__attribute__((warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Renames a single-table tablespace. The tablespace must be cached in the
+tablespace memory cache.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+fil_rename_tablespace(
+/*==================*/
+	const char*	old_name_in,	/*!< in: old table name in the
+					standard databasename/tablename
+					format of InnoDB, or NULL if we
+					do the rename based on the space
+					id only */
+	ulint		id,		/*!< in: space id */
+	const char*	new_name,	/*!< in: new table name in the
+					standard databasename/tablename
+					format of InnoDB */
+	const char*	new_path);	/*!< in: new full datafile path
+					if the tablespace is remotely
+					located, or NULL if it is located
+					in the normal data directory. */
+
+/*******************************************************************//**
+Allocates a file name for a single-table tablespace. The string must be freed
+by caller with mem_free().
+@return	own: file name */
+UNIV_INTERN
+char*
+fil_make_ibd_name(
+/*==============*/
+	const char*	name,		/*!< in: table name or a dir path */
+	bool		is_full_path);	/*!< in: TRUE if it is a dir path */
+/*******************************************************************//**
+Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
+The string must be freed by caller with mem_free().
+@return	own: file name */
+UNIV_INTERN
+char*
+fil_make_isl_name(
+/*==============*/
+	const char*	name);	/*!< in: table name */
+/*******************************************************************//**
+Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path '.'.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_link_file(
+/*=================*/
+	const char*	tablename,	/*!< in: tablename */
+	const char*	filepath);	/*!< in: pathname of tablespace */
+/*******************************************************************//**
+Deletes an InnoDB Symbolic Link (ISL) file. */
+UNIV_INTERN
+void
+fil_delete_link_file(
+/*==================*/
+	const char*	tablename);	/*!< in: name of table */
+/*******************************************************************//**
+Reads an InnoDB Symbolic Link (ISL) file.
+It is always created under the 'datadir' of MySQL.  The name is of the
+form {databasename}/{tablename}. and the isl file is expected to be in a
+'{databasename}' directory called '{tablename}.isl'. The caller must free
+the memory of the null-terminated path returned if it is not null.
+@return	own: filepath found in link file, NULL if not found. */
+UNIV_INTERN
+char*
+fil_read_link_file(
+/*===============*/
+	const char*	name);		/*!< in: tablespace name */
+/*******************************************************************//**
+Creates a new single-table tablespace to a database directory of MySQL.
+Database directories are under the 'datadir' of MySQL. The datadir is the
+directory of a running mysqld program. We can refer to it by simply the
+path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
+dir of the mysqld server.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_new_single_table_tablespace(
+/*===================================*/
+	ulint		space_id,	/*!< in: space id */
+	const char*	tablename,	/*!< in: the table name in the usual
+					databasename/tablename format
+					of InnoDB */
+	const char*	dir_path,	/*!< in: NULL or a dir path */
+	ulint		flags,		/*!< in: tablespace flags */
+	ulint		flags2,		/*!< in: table flags2 */
+	ulint		size)		/*!< in: the initial size of the
+					tablespace file in pages,
+					must be >= FIL_IBD_FILE_INITIAL_SIZE */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Tries to open a single-table tablespace and optionally checks the space id is
+right in it. If does not succeed, prints an error message to the .err log. This
+function is used to open a tablespace when we start up mysqld, and also in
+IMPORT TABLESPACE.
+NOTE that we assume this operation is used either at the database startup
+or under the protection of the dictionary mutex, so that two users cannot
+race here. This operation does not leave the file associated with the
+tablespace open, but closes it after we have looked at the space id in it.
+
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file.  This boolean may be initially FALSE, but if
+a remote tablespace is found it will be changed to true.
+
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
+
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_open_single_table_tablespace(
+/*=============================*/
+	bool		validate,	/*!< in: Do we validate tablespace? */
+	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
+	ulint		id,		/*!< in: space id */
+	ulint		flags,		/*!< in: tablespace flags */
+	const char*	tablename,	/*!< in: table name in the
+					databasename/tablename format */
+	const char*	filepath)	/*!< in: tablespace filepath */
+	__attribute__((nonnull(5), warn_unused_result));
+
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+At the server startup, if we need crash recovery, scans the database
+directories under the MySQL datadir, looking for .ibd files. Those files are
+single-table tablespaces. We need to know the space id in each of them so that
+we know into which file we should look to check the contents of a page stored
+in the doublewrite buffer, also to know where to apply log records where the
+space id is != 0.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+fil_load_single_table_tablespaces(void);
+/*===================================*/
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace does not exist in the memory cache,
+or is being deleted there.
+@return	TRUE if does not exist or is being deleted */
+UNIV_INTERN
+ibool
+fil_tablespace_deleted_or_being_deleted_in_mem(
+/*===========================================*/
+	ulint		id,	/*!< in: space id */
+	ib_int64_t	version);/*!< in: tablespace_version should be this; if
+				you pass -1 as the value of this, then this
+				parameter is ignored */
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace exists in the memory cache.
+@return	TRUE if exists */
+UNIV_INTERN
+ibool
+fil_tablespace_exists_in_mem(
+/*=========================*/
+	ulint	id);	/*!< in: space id */
+#ifndef UNIV_HOTBACKUP
+/*******************************************************************//**
+Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+cache. Note that if we have not done a crash recovery at the database startup,
+there may be many tablespaces which are not yet in the memory cache.
+@return	TRUE if a matching tablespace exists in the memory cache */
+UNIV_INTERN
+ibool
+fil_space_for_table_exists_in_mem(
+/*==============================*/
+	ulint		id,		/*!< in: space id */
+	const char*	name,		/*!< in: table name in the standard
+					'databasename/tablename' format */
+	ibool		mark_space,	/*!< in: in crash recovery, at database
+					startup we mark all spaces which have
+					an associated table in the InnoDB
+					data dictionary, so that
+					we can print a warning about orphaned
+					tablespaces */
+	ibool		print_error_if_does_not_exist,
+					/*!< in: print detailed error
+					information to the .err log if a
+					matching tablespace is not found from
+					memory */
+	bool		adjust_space,	/*!< in: whether to adjust space id
+					when find table space mismatch */
+	mem_heap_t*	heap,		/*!< in: heap memory */
+	table_id_t	table_id);	/*!< in: table id */
+#else /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Extends all tablespaces to the size stored in the space header. During the
+mysqlbackup --apply-log phase we extended the spaces on-demand so that log
+records could be appllied, but that may have left spaces still too small
+compared to the size stored in the space header. */
+UNIV_INTERN
+void
+fil_extend_tablespaces_to_stored_len(void);
+/*======================================*/
+#endif /* !UNIV_HOTBACKUP */
+/**********************************************************************//**
+Tries to extend a data file so that it would accommodate the number of pages
+given. The tablespace must be cached in the memory cache. If the space is big
+enough already, does nothing.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+fil_extend_space_to_desired_size(
+/*=============================*/
+	ulint*	actual_size,	/*!< out: size of the space after extension;
+				if we ran out of disk space this may be lower
+				than the desired size */
+	ulint	space_id,	/*!< in: space id */
+	ulint	size_after_extend);/*!< in: desired size in pages after the
+				extension; if the current space size is bigger
+				than this already, the function does nothing */
+/*******************************************************************//**
+Tries to reserve free extents in a file space.
+@return	TRUE if succeed */
+UNIV_INTERN
+ibool
+fil_space_reserve_free_extents(
+/*===========================*/
+	ulint	id,		/*!< in: space id */
+	ulint	n_free_now,	/*!< in: number of free extents now */
+	ulint	n_to_reserve);	/*!< in: how many one wants to reserve */
+/*******************************************************************//**
+Releases free extents in a file space. */
+UNIV_INTERN
+void
+fil_space_release_free_extents(
+/*===========================*/
+	ulint	id,		/*!< in: space id */
+	ulint	n_reserved);	/*!< in: how many one reserved */
+/*******************************************************************//**
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+UNIV_INTERN
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+	ulint	id);		/*!< in: space id */
+/********************************************************************//**
+Reads or writes data. This operation is asynchronous (aio).
+@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
+i/o on a tablespace which does not exist */
+UNIV_INTERN
+dberr_t
+fil_io(
+/*===*/
+	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
+				ORed to OS_FILE_LOG, if a log i/o
+				and ORed to OS_AIO_SIMULATED_WAKE_LATER
+				if simulated aio and we want to post a
+				batch of i/os; NOTE that a simulated batch
+				may introduce hidden chances of deadlocks,
+				because i/os are not actually handled until
+				all have been posted: use with great
+				caution! */
+	bool	sync,		/*!< in: true if synchronous aio is desired */
+	ulint	space_id,	/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	block_offset,	/*!< in: offset in number of blocks */
+	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
+				aio this must be divisible by the OS block
+				size */
+	ulint	len,		/*!< in: how many bytes to read or write; this
+				must not cross a file boundary; in aio this
+				must be a block size multiple */
+	void*	buf,		/*!< in/out: buffer where to store read data
+				or from where to write; in aio this must be
+				appropriately aligned */
+	void*	message)	/*!< in: message for aio handler if non-sync
+				aio used, else ignored */
+	__attribute__((nonnull(8)));
+/**********************************************************************//**
+Waits for an aio operation to complete. This function is used to write the
+handler for completed requests. The aio array of pending requests is divided
+into segments (see os0file.cc for more info). The thread specifies which
+segment it wants to wait for. */
+UNIV_INTERN
+void
+fil_aio_wait(
+/*=========*/
+	ulint	segment);	/*!< in: the number of the segment in the aio
+				array to wait for */
+/**********************************************************************//**
+Flushes to disk possible writes cached by the OS. If the space does not exist
+or is being dropped, does not do anything. */
+UNIV_INTERN
+void
+fil_flush(
+/*======*/
+	ulint	space_id);	/*!< in: file space id (this can be a group of
+				log files or a tablespace of the database) */
+/**********************************************************************//**
+Flushes to disk writes in file spaces of the given type possibly cached by
+the OS. */
+UNIV_INTERN
+void
+fil_flush_file_spaces(
+/*==================*/
+	ulint	purpose);	/*!< in: FIL_TABLESPACE, FIL_LOG */
+/******************************************************************//**
+Checks the consistency of the tablespace cache.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+fil_validate(void);
+/*==============*/
+/********************************************************************//**
+Returns TRUE if file address is undefined.
+@return	TRUE if undefined */
+UNIV_INTERN
+ibool
+fil_addr_is_null(
+/*=============*/
+	fil_addr_t	addr);	/*!< in: address */
+/********************************************************************//**
+Get the predecessor of a file page.
+@return	FIL_PAGE_PREV */
+UNIV_INTERN
+ulint
+fil_page_get_prev(
+/*==============*/
+	const byte*	page);	/*!< in: file page */
+/********************************************************************//**
+Get the successor of a file page.
+@return	FIL_PAGE_NEXT */
+UNIV_INTERN
+ulint
+fil_page_get_next(
+/*==============*/
+	const byte*	page);	/*!< in: file page */
+/*********************************************************************//**
+Sets the file page type. */
+UNIV_INTERN
+void
+fil_page_set_type(
+/*==============*/
+	byte*	page,	/*!< in/out: file page */
+	ulint	type);	/*!< in: type */
+/*********************************************************************//**
+Gets the file page type.
+@return type; NOTE that if the type has not been written to page, the
+return value not defined */
+UNIV_INTERN
+ulint
+fil_page_get_type(
+/*==============*/
+	const byte*	page);	/*!< in: file page */
+
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace is being deleted.
+@return TRUE if being deleted */
+UNIV_INTERN
+ibool
+fil_tablespace_is_being_deleted(
+/*============================*/
+	ulint		id);	/*!< in: space id */
+
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables. */
+UNIV_INTERN
+void
+fil_delete_file(
+/*============*/
+	const char*	path);	/*!< in: filepath of the ibd tablespace */
+
+/** Callback functor. */
+struct PageCallback {
+
+	/**
+	Default constructor */
+	PageCallback()
+		:
+		m_zip_size(),
+		m_page_size(),
+		m_filepath() UNIV_NOTHROW {}
+
+	virtual ~PageCallback() UNIV_NOTHROW {}
+
+	/**
+	Called for page 0 in the tablespace file at the start.
+	@param file_size - size of the file in bytes
+	@param block - contents of the first page in the tablespace file
+	@retval DB_SUCCESS or error code.*/
+	virtual dberr_t init(
+		os_offset_t		file_size,
+		const buf_block_t*	block) UNIV_NOTHROW = 0;
+
+	/**
+	Called for every page in the tablespace. If the page was not
+	updated then its state must be set to BUF_PAGE_NOT_USED. For
+	compressed tables the page descriptor memory will be at offset:
+       		block->frame + UNIV_PAGE_SIZE;
+	@param offset - physical offset within the file
+	@param block - block read from file, note it is not from the buffer pool
+	@retval DB_SUCCESS or error code. */
+	virtual dberr_t operator()(
+		os_offset_t 	offset,
+		buf_block_t*	block) UNIV_NOTHROW = 0;
+
+	/**
+	Set the name of the physical file and the file handle that is used
+	to open it for the file that is being iterated over.
+	@param filename - then physical name of the tablespace file.
+	@param file - OS file handle */
+	void set_file(const char* filename, os_file_t file) UNIV_NOTHROW
+	{
+		m_file = file;
+		m_filepath = filename;
+	}
+
+	/**
+	@return the space id of the tablespace */
+	virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+	/** The compressed page size
+	@return the compressed page size */
+	ulint get_zip_size() const
+	{
+		return(m_zip_size);
+	}
+
+	/**
+	Set the tablespace compressed table size.
+	@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
+	dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
+
+	/** The compressed page size
+	@return the compressed page size */
+	ulint get_page_size() const
+	{
+		return(m_page_size);
+	}
+
+	/** Compressed table page size */
+	ulint			m_zip_size;
+
+	/** The tablespace page size. */
+	ulint			m_page_size;
+
+	/** File handle to the tablespace */
+	os_file_t		m_file;
+
+	/** Physical file path. */
+	const char*		m_filepath;
+
+protected:
+	// Disable copying
+	PageCallback(const PageCallback&);
+	PageCallback& operator=(const PageCallback&);
+};
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+	dict_table_t*		table,
+	ulint			n_io_buffers,
+	PageCallback&		callback)
+	__attribute__((nonnull, warn_unused_result));
+
+/*******************************************************************//**
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache.
+@return	space id, ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+	const char*	name);	/*!< in: table name in the standard
+				'databasename/tablename' format */
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fil_get_space_names(
+/*================*/
+	space_name_list_t&	space_name_list)
+				/*!< in/out: Vector for collecting the names. */
+	__attribute__((warn_unused_result));
+
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+	ulint		old_space_id,	/*!< in: tablespace id of the old
+					table. */
+	const char*	old_name,	/*!< in: old table name */
+	ulint		new_space_id,	/*!< in: tablespace id of the new
+					table */
+	const char*	new_name,	/*!< in: new table name */
+	const char*	tmp_name,	/*!< in: temp table name used while
+					swapping */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+
+/*******************************************************************//**
+Finds the given page_no of the given space id from the double write buffer,
+and copies it to the corresponding .ibd file.
+@return true if copy was successful, or false. */
+bool
+fil_user_tablespace_restore_page(
+/*==============================*/
+	fsp_open_info*	fsp,		/* in: contains space id and .ibd
+					file information */
+	ulint		page_no);	/* in: page_no to obtain from double
+					write buffer */
+
+#endif /* !UNIV_INNOCHECKSUM */
+#endif /* fil0fil_h */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
new file mode 100644
index 00000000000..a587ccc9f20
--- /dev/null
+++ b/storage/innobase/include/fsp0fsp.h
@@ -0,0 +1,747 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0fsp.h
+File space management
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fsp0fsp_h
+#define fsp0fsp_h
+
+#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "mtr0mtr.h"
+#include "fut0lst.h"
+#include "ut0byte.h"
+#include "page0types.h"
+#include "fsp0types.h"
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
+
+/** Width of the POST_ANTELOPE flag */
+#define FSP_FLAGS_WIDTH_POST_ANTELOPE	1
+/** Number of flag bits used to indicate the tablespace zip page size */
+#define FSP_FLAGS_WIDTH_ZIP_SSIZE	4
+/** Width of the ATOMIC_BLOBS flag.  The ability to break up a long
+column into an in-record prefix and an externally stored part is available
+to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS	1
+/** Number of flag bits used to indicate the tablespace page size */
+#define FSP_FLAGS_WIDTH_PAGE_SSIZE	4
+/** Width of the DATA_DIR flag.  This flag indicates that the tablespace
+is found in a remote location, not the default data directory. */
+#define FSP_FLAGS_WIDTH_DATA_DIR	1
+/** Width of all the currently known tablespace flags */
+#define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
+				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
+				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
+				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
+				+ FSP_FLAGS_WIDTH_DATA_DIR)
+
+/** A mask of all the known/used bits in tablespace flags */
+#define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
+
+/** Zero relative shift position of the POST_ANTELOPE field */
+#define FSP_FLAGS_POS_POST_ANTELOPE	0
+/** Zero relative shift position of the ZIP_SSIZE field */
+#define FSP_FLAGS_POS_ZIP_SSIZE		(FSP_FLAGS_POS_POST_ANTELOPE	\
+					+ FSP_FLAGS_WIDTH_POST_ANTELOPE)
+/** Zero relative shift position of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
+					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
+/** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
+					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_UNUSED		(FSP_FLAGS_POS_DATA_DIR	\
+					+ FSP_FLAGS_WIDTH_DATA_DIR)
+
+/** Bit mask of the POST_ANTELOPE field */
+#define FSP_FLAGS_MASK_POST_ANTELOPE				\
+		((~(~0 << FSP_FLAGS_WIDTH_POST_ANTELOPE))	\
+		<< FSP_FLAGS_POS_POST_ANTELOPE)
+/** Bit mask of the ZIP_SSIZE field */
+#define FSP_FLAGS_MASK_ZIP_SSIZE				\
+		((~(~0 << FSP_FLAGS_WIDTH_ZIP_SSIZE))		\
+		<< FSP_FLAGS_POS_ZIP_SSIZE)
+/** Bit mask of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_MASK_ATOMIC_BLOBS				\
+		((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_BLOBS))	\
+		<< FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Bit mask of the PAGE_SSIZE field */
+#define FSP_FLAGS_MASK_PAGE_SSIZE				\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_SSIZE))		\
+		<< FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the DATA_DIR field */
+#define FSP_FLAGS_MASK_DATA_DIR					\
+		((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR))		\
+		<< FSP_FLAGS_POS_DATA_DIR)
+
+/** Return the value of the POST_ANTELOPE field */
+#define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
+		((flags & FSP_FLAGS_MASK_POST_ANTELOPE)		\
+		>> FSP_FLAGS_POS_POST_ANTELOPE)
+/** Return the value of the ZIP_SSIZE field */
+#define FSP_FLAGS_GET_ZIP_SSIZE(flags)				\
+		((flags & FSP_FLAGS_MASK_ZIP_SSIZE)		\
+		>> FSP_FLAGS_POS_ZIP_SSIZE)
+/** Return the value of the ATOMIC_BLOBS field */
+#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags)			\
+		((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS)		\
+		>> FSP_FLAGS_POS_ATOMIC_BLOBS)
+/** Return the value of the PAGE_SSIZE field */
+#define FSP_FLAGS_GET_PAGE_SSIZE(flags)				\
+		((flags & FSP_FLAGS_MASK_PAGE_SSIZE)		\
+		>> FSP_FLAGS_POS_PAGE_SSIZE)
+/** Return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags)				\
+		((flags & FSP_FLAGS_MASK_DATA_DIR)		\
+		>> FSP_FLAGS_POS_DATA_DIR)
+/** Return the contents of the UNUSED bits */
+#define FSP_FLAGS_GET_UNUSED(flags)				\
+		(flags >> FSP_FLAGS_POS_UNUSED)
+
+/** Set a PAGE_SSIZE into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize)			\
+		(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
+
+/* @} */
+
+/* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
+
+/** Offset of the space header within a file page */
+#define FSP_HEADER_OFFSET	FIL_PAGE_DATA
+
+/* The data structures in files are defined just as byte strings in C */
+typedef	byte	fsp_header_t;
+typedef	byte	xdes_t;
+
+/*			SPACE HEADER
+			============
+
+File space header data structure: this data structure is contained in the
+first page of a space. The space for this header is reserved in every extent
+descriptor page, but used only in the first. */
+
+/*-------------------------------------*/
+#define FSP_SPACE_ID		0	/* space id */
+#define FSP_NOT_USED		4	/* this field contained a value up to
+					which we know that the modifications
+					in the database have been flushed to
+					the file space; not used now */
+#define	FSP_SIZE		8	/* Current size of the space in
+					pages */
+#define	FSP_FREE_LIMIT		12	/* Minimum page number for which the
+					free list has not been initialized:
+					the pages >= this limit are, by
+					definition, free; note that in a
+					single-table tablespace where size
+					< 64 pages, this number is 64, i.e.,
+					we have initialized the space
+					about the first extent, but have not
+					physically allocted those pages to the
+					file */
+#define	FSP_SPACE_FLAGS		16	/* fsp_space_t.flags, similar to
+					dict_table_t::flags */
+#define	FSP_FRAG_N_USED		20	/* number of used pages in the
+					FSP_FREE_FRAG list */
+#define	FSP_FREE		24	/* list of free extents */
+#define	FSP_FREE_FRAG		(24 + FLST_BASE_NODE_SIZE)
+					/* list of partially free extents not
+					belonging to any segment */
+#define	FSP_FULL_FRAG		(24 + 2 * FLST_BASE_NODE_SIZE)
+					/* list of full extents not belonging
+					to any segment */
+#define FSP_SEG_ID		(24 + 3 * FLST_BASE_NODE_SIZE)
+					/* 8 bytes which give the first unused
+					segment id */
+#define FSP_SEG_INODES_FULL	(32 + 3 * FLST_BASE_NODE_SIZE)
+					/* list of pages containing segment
+					headers, where all the segment inode
+					slots are reserved */
+#define FSP_SEG_INODES_FREE	(32 + 4 * FLST_BASE_NODE_SIZE)
+					/* list of pages containing segment
+					headers, where not all the segment
+					header slots are reserved */
+/*-------------------------------------*/
+/* File space header size */
+#define	FSP_HEADER_SIZE		(32 + 5 * FLST_BASE_NODE_SIZE)
+
+#define	FSP_FREE_ADD		4	/* this many free extents are added
+					to the free list from above
+					FSP_FREE_LIMIT at a time */
+/* @} */
+
+#ifndef UNIV_INNOCHECKSUM
+
+/* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */
+
+/*			FILE SEGMENT INODE
+			==================
+
+Segment inode which is created for each segment in a tablespace. NOTE: in
+purge we assume that a segment having only one currently used page can be
+freed in a few steps, so that the freeing cannot fill the file buffer with
+bufferfixed file pages. */
+
+typedef	byte	fseg_inode_t;
+
+#define FSEG_INODE_PAGE_NODE	FSEG_PAGE_DATA
+					/* the list node for linking
+					segment inode pages */
+
+#define FSEG_ARR_OFFSET		(FSEG_PAGE_DATA + FLST_NODE_SIZE)
+/*-------------------------------------*/
+#define	FSEG_ID			0	/* 8 bytes of segment id: if this is 0,
+					it means that the header is unused */
+#define FSEG_NOT_FULL_N_USED	8
+					/* number of used segment pages in
+					the FSEG_NOT_FULL list */
+#define	FSEG_FREE		12
+					/* list of free extents of this
+					segment */
+#define	FSEG_NOT_FULL		(12 + FLST_BASE_NODE_SIZE)
+					/* list of partially free extents */
+#define	FSEG_FULL		(12 + 2 * FLST_BASE_NODE_SIZE)
+					/* list of full extents */
+#define	FSEG_MAGIC_N		(12 + 3 * FLST_BASE_NODE_SIZE)
+					/* magic number used in debugging */
+#define	FSEG_FRAG_ARR		(16 + 3 * FLST_BASE_NODE_SIZE)
+					/* array of individual pages
+					belonging to this segment in fsp
+					fragment extent lists */
+#define FSEG_FRAG_ARR_N_SLOTS	(FSP_EXTENT_SIZE / 2)
+					/* number of slots in the array for
+					the fragment pages */
+#define	FSEG_FRAG_SLOT_SIZE	4	/* a fragment page slot contains its
+					page number within space, FIL_NULL
+					means that the slot is not in use */
+/*-------------------------------------*/
+#define FSEG_INODE_SIZE					\
+	(16 + 3 * FLST_BASE_NODE_SIZE			\
+	 + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
+
+#define FSP_SEG_INODES_PER_PAGE(zip_size)		\
+	(((zip_size ? zip_size : UNIV_PAGE_SIZE)	\
+	  - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
+				/* Number of segment inodes which fit on a
+				single page */
+
+#define FSEG_MAGIC_N_VALUE	97937874
+
+#define	FSEG_FILLFACTOR		8	/* If this value is x, then if
+					the number of unused but reserved
+					pages in a segment is less than
+					reserved pages * 1/x, and there are
+					at least FSEG_FRAG_LIMIT used pages,
+					then we allow a new empty extent to
+					be added to the segment in
+					fseg_alloc_free_page. Otherwise, we
+					use unused pages of the segment. */
+
+#define FSEG_FRAG_LIMIT		FSEG_FRAG_ARR_N_SLOTS
+					/* If the segment has >= this many
+					used pages, it may be expanded by
+					allocating extents to the segment;
+					until that only individual fragment
+					pages are allocated from the space */
+
+#define	FSEG_FREE_LIST_LIMIT	40	/* If the reserved size of a segment
+					is at least this many extents, we
+					allow extents to be put to the free
+					list of the extent: at most
+					FSEG_FREE_LIST_MAX_LEN many */
+#define	FSEG_FREE_LIST_MAX_LEN	4
+/* @} */
+
+/* @defgroup Extent Descriptor Constants (moved from fsp0fsp.c) @{ */
+
+/*			EXTENT DESCRIPTOR
+			=================
+
+File extent descriptor data structure: contains bits to tell which pages in
+the extent are free and which contain old tuple version to clean. */
+
+/*-------------------------------------*/
+#define	XDES_ID			0	/* The identifier of the segment
+					to which this extent belongs */
+#define XDES_FLST_NODE		8	/* The list node data structure
+					for the descriptors */
+#define	XDES_STATE		(FLST_NODE_SIZE + 8)
+					/* contains state information
+					of the extent */
+#define	XDES_BITMAP		(FLST_NODE_SIZE + 12)
+					/* Descriptor bitmap of the pages
+					in the extent */
+/*-------------------------------------*/
+
+#define	XDES_BITS_PER_PAGE	2	/* How many bits are there per page */
+#define	XDES_FREE_BIT		0	/* Index of the bit which tells if
+					the page is free */
+#define	XDES_CLEAN_BIT		1	/* NOTE: currently not used!
+					Index of the bit which tells if
+					there are old versions of tuples
+					on the page */
+/* States of a descriptor */
+#define	XDES_FREE		1	/* extent is in free list of space */
+#define	XDES_FREE_FRAG		2	/* extent is in free fragment list of
+					space */
+#define	XDES_FULL_FRAG		3	/* extent is in full fragment list of
+					space */
+#define	XDES_FSEG		4	/* extent belongs to a segment */
+
+/** File extent data structure size in bytes. */
+#define	XDES_SIZE							\
+	(XDES_BITMAP							\
+	+ UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
+
+/** File extent data structure size in bytes for MAX page size. */
+#define	XDES_SIZE_MAX							\
+	(XDES_BITMAP							\
+	+ UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MAX * XDES_BITS_PER_PAGE))
+
+/** File extent data structure size in bytes for MIN page size. */
+#define	XDES_SIZE_MIN							\
+	(XDES_BITMAP							\
+	+ UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MIN * XDES_BITS_PER_PAGE))
+
+/** Offset of the descriptor array on a descriptor page */
+#define	XDES_ARR_OFFSET		(FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
+
+/* @} */
+
+/**********************************************************************//**
+Initializes the file space system. */
+UNIV_INTERN
+void
+fsp_init(void);
+/*==========*/
+/**********************************************************************//**
+Gets the size of the system tablespace from the tablespace header.  If
+we do not have an auto-extending data file, this should be equal to
+the size of the data files.  If there is an auto-extending data file,
+this can be smaller.
+@return	size in pages */
+UNIV_INTERN
+ulint
+fsp_header_get_tablespace_size(void);
+/*================================*/
+/**********************************************************************//**
+Reads the file space size stored in the header page.
+@return	tablespace size stored in the space header */
+UNIV_INTERN
+ulint
+fsp_get_size_low(
+/*=============*/
+	page_t*	page);	/*!< in: header page (page 0 in the tablespace) */
+/**********************************************************************//**
+Reads the space id from the first page of a tablespace.
+@return	space id, ULINT UNDEFINED if error */
+UNIV_INTERN
+ulint
+fsp_header_get_space_id(
+/*====================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+/**********************************************************************//**
+Reads the space flags from the first page of a tablespace.
+@return	flags */
+UNIV_INTERN
+ulint
+fsp_header_get_flags(
+/*=================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+/**********************************************************************//**
+Reads the compressed page size from the first page of a tablespace.
+@return	compressed page size in bytes, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_zip_size(
+/*====================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+/**********************************************************************//**
+Writes the space id and flags to a tablespace header.  The flags contain
+row type, physical/compressed page size, and logical/uncompressed page
+size of the tablespace. */
+UNIV_INTERN
+void
+fsp_header_init_fields(
+/*===================*/
+	page_t*	page,		/*!< in/out: first page in the space */
+	ulint	space_id,	/*!< in: space id */
+	ulint	flags);		/*!< in: tablespace flags (FSP_SPACE_FLAGS):
+				0, or table->flags if newer than COMPACT */
+/**********************************************************************//**
+Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0. */
+UNIV_INTERN
+void
+fsp_header_init(
+/*============*/
+	ulint	space,		/*!< in: space id */
+	ulint	size,		/*!< in: current size in blocks */
+	mtr_t*	mtr);		/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Increases the space size field of a space. */
+UNIV_INTERN
+void
+fsp_header_inc_size(
+/*================*/
+	ulint	space,		/*!< in: space id */
+	ulint	size_inc,	/*!< in: size increment in pages */
+	mtr_t*	mtr);		/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
+fseg_create(
+/*========*/
+	ulint	space,	/*!< in: space id */
+	ulint	page,	/*!< in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /*!< in: byte offset of the created segment header
+			on the page */
+	mtr_t*	mtr);	/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Creates a new segment.
+@return the block where the segment header is placed, x-latched, NULL
+if could not create segment because of lack of space */
+UNIV_INTERN
+buf_block_t*
+fseg_create_general(
+/*================*/
+	ulint	space,	/*!< in: space id */
+	ulint	page,	/*!< in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /*!< in: byte offset of the created segment header
+			on the page */
+	ibool	has_done_reservation, /*!< in: TRUE if the caller has already
+			done the reservation for the pages with
+			fsp_reserve_free_extents (at least 2 extents: one for
+			the inode and the other for the segment) then there is
+			no need to do the check for this individual
+			operation */
+	mtr_t*	mtr);	/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used.
+@return	number of reserved pages */
+UNIV_INTERN
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+	fseg_header_t*	header,	/*!< in: segment header */
+	ulint*		used,	/*!< out: number of pages used (<= reserved) */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize
+file space fragmentation.
+@param[in/out] seg_header	segment header
+@param[in] hint			hint of which page would be desirable
+@param[in] direction		if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR
+@param[in/out] mtr		mini-transaction
+@return	X-latched block, or NULL if no page could be allocated */
+#define fseg_alloc_free_page(seg_header, hint, direction, mtr)		\
+	fseg_alloc_free_page_general(seg_header, hint, direction,	\
+				     FALSE, mtr, mtr)
+/**********************************************************************//**
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation.
+@retval NULL if no page could be allocated
+@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
+(init_mtr == mtr, or the page was not previously freed in mtr)
+@retval block (not allocated or initialized) otherwise */
+UNIV_INTERN
+buf_block_t*
+fseg_alloc_free_page_general(
+/*=========================*/
+	fseg_header_t*	seg_header,/*!< in/out: segment header */
+	ulint		hint,	/*!< in: hint of which page would be
+				desirable */
+	byte		direction,/*!< in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	ibool		has_done_reservation, /*!< in: TRUE if the caller has
+				already done the reservation for the page
+				with fsp_reserve_free_extents, then there
+				is no need to do the check for this individual
+				page */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	mtr_t*		init_mtr)/*!< in/out: mtr or another mini-transaction
+				in which the page should be initialized.
+				If init_mtr!=mtr, but the page is already
+				latched in mtr, do not initialize the page. */
+	__attribute__((warn_unused_result, nonnull));
+/**********************************************************************//**
+Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_release_free_extents!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space.
+
+Single-table tablespaces whose size is < 32 pages are a special case. In this
+function we would liberally reserve several 64 page extents for every page
+split or merge in a B-tree. But we do not want to waste disk space if the table
+only occupies < 32 pages. That is why we apply different rules in that special
+case, just ensuring that there are 3 free pages available.
+@return	TRUE if we were able to make the reservation */
+UNIV_INTERN
+ibool
+fsp_reserve_free_extents(
+/*=====================*/
+	ulint*	n_reserved,/*!< out: number of extents actually reserved; if we
+			return TRUE and the tablespace size is < 64 pages,
+			then this can be 0, otherwise it is n_ext */
+	ulint	space,	/*!< in: space id */
+	ulint	n_ext,	/*!< in: number of extents to reserve */
+	ulint	alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+	mtr_t*	mtr);	/*!< in: mini-transaction */
+/**********************************************************************//**
+This function should be used to get information on how much we still
+will be able to insert new data to the database without running out the
+tablespace. Only free extents are taken into account and we also subtract
+the safety margin required by the above function fsp_reserve_free_extents.
+@return	available space in kB */
+UNIV_INTERN
+ullint
+fsp_get_available_space_in_free_extents(
+/*====================================*/
+	ulint	space);	/*!< in: space id */
+/**********************************************************************//**
+Frees a single page of a segment. */
+UNIV_INTERN
+void
+fseg_free_page(
+/*===========*/
+	fseg_header_t*	seg_header, /*!< in: segment header */
+	ulint		space,	/*!< in: space id */
+	ulint		page,	/*!< in: page offset */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Checks if a single page of a segment is free.
+@return	true if free */
+UNIV_INTERN
+bool
+fseg_page_is_free(
+/*==============*/
+	fseg_header_t*	seg_header,	/*!< in: segment header */
+	ulint		space,		/*!< in: space id */
+	ulint		page)		/*!< in: page offset */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
+Frees part of a segment. This function can be used to free a segment
+by repeatedly calling this function in different mini-transactions.
+Doing the freeing in a single mini-transaction might result in
+too big a mini-transaction.
+@return	TRUE if freeing completed */
+UNIV_INTERN
+ibool
+fseg_free_step(
+/*===========*/
+	fseg_header_t*	header,	/*!< in, own: segment header; NOTE: if the header
+				resides on the first page of the frag list
+				of the segment, this pointer becomes obsolete
+				after the last freeing step */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/**********************************************************************//**
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed.
+@return	TRUE if freeing completed, except the header page */
+UNIV_INTERN
+ibool
+fseg_free_step_not_header(
+/*======================*/
+	fseg_header_t*	header,	/*!< in: segment header which must reside on
+				the first fragment page of the segment */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/***********************************************************************//**
+Checks if a page address is an extent descriptor page address.
+@return	TRUE if a descriptor page */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no);/*!< in: page number */
+/***********************************************************//**
+Parses a redo log record of a file page init.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+fsp_parse_init_file_page(
+/*=====================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr, /*!< in: buffer end */
+	buf_block_t*	block);	/*!< in: block or NULL */
+/*******************************************************************//**
+Validates the file space system and its segments.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+fsp_validate(
+/*=========*/
+	ulint	space);	/*!< in: space id */
+/*******************************************************************//**
+Prints info of a file space. */
+UNIV_INTERN
+void
+fsp_print(
+/*======*/
+	ulint	space);	/*!< in: space id */
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Validates a segment.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+fseg_validate(
+/*==========*/
+	fseg_header_t*	header, /*!< in: segment header */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_BTR_PRINT
+/*******************************************************************//**
+Writes info of a segment. */
+UNIV_INTERN
+void
+fseg_print(
+/*=======*/
+	fseg_header_t*	header, /*!< in: segment header */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+#endif /* UNIV_BTR_PRINT */
+
+/********************************************************************//**
+Validate and return the tablespace flags, which are stored in the
+tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
+ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
+COMPRESSED and DYNAMIC, use a file format > Antelope so they should
+have a file format number plus the DICT_TF_COMPACT bit set.
+@return	true if check ok */
+UNIV_INLINE
+bool
+fsp_flags_is_valid(
+/*===============*/
+	ulint	flags)		/*!< in: tablespace flags */
+	__attribute__((warn_unused_result, const));
+/********************************************************************//**
+Determine if the tablespace is compressed from dict_table_t::flags.
+@return	TRUE if compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_compressed(
+/*====================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return	descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset);	/*!< in: page offset */
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return	TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+	const xdes_t*	descr,	/*!< in: descriptor */
+	ulint		bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint		offset);/*!< in: page offset within extent:
+				0 ... FSP_EXTENT_SIZE - 1 */
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return	descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset);	/*!< in: page offset */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************************//**
+Extract the zip size from tablespace flags.  A tablespace has only one
+physical page size whether that page is compressed or not.
+@return	compressed page size of the file-per-table tablespace in bytes,
+or zero if the table is not compressed.  */
+UNIV_INLINE
+ulint
+fsp_flags_get_zip_size(
+/*====================*/
+	ulint	flags);		/*!< in: tablespace flags */
+/********************************************************************//**
+Extract the page size from tablespace flags.
+@return	page size of the tablespace in bytes */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_size(
+/*====================*/
+	ulint	flags);		/*!< in: tablespace flags */
+
+#ifndef UNIV_NONINL
+#include "fsp0fsp.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
new file mode 100644
index 00000000000..0d81e817cc9
--- /dev/null
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -0,0 +1,314 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/fsp0fsp.ic
+File space management
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+/***********************************************************************//**
+Checks if a page address is an extent descriptor page address.
+@return	TRUE if a descriptor page */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no)/*!< in: page number */
+{
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (!zip_size) {
+		return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET);
+	}
+
+	return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET);
+}
+
+/********************************************************************//**
+Validate and return the tablespace flags, which are stored in the
+tablespace header at offset FSP_SPACE_FLAGS.  They should be 0 for
+ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
+COMPRESSED and DYNAMIC, use a file format > Antelope so they should
+have a file format number plus the DICT_TF_COMPACT bit set.
+@return	true if check ok */
+UNIV_INLINE
+bool
+fsp_flags_is_valid(
+/*===============*/
+	ulint	flags)		/*!< in: tablespace flags */
+{
+	ulint	post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(flags);
+	ulint	zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+	ulint	atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
+	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
+	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
+
+	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
+
+	/* fsp_flags is zero unless atomic_blobs is set. */
+	/* Make sure there are no bits that we do not know about. */
+	if (unused != 0 || flags == 1) {
+		return(false);
+	} else if (post_antelope) {
+		/* The Antelope row formats REDUNDANT and COMPACT did
+		not use tablespace flags, so this flag and the entire
+		4-byte field is zero for Antelope row formats. */
+
+		if (!atomic_blobs) {
+			return(false);
+		}
+	}
+
+	if (!atomic_blobs) {
+		/* Barracuda row formats COMPRESSED and DYNAMIC build on
+		the page structure introduced for the COMPACT row format
+		by allowing long fields to be broken into prefix and
+		externally stored parts. */
+
+		if (post_antelope || zip_ssize != 0) {
+			return(false);
+		}
+
+	} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+		return(false);
+	} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
+
+		/* The page size field can be used for any row type, or it may
+		be zero for an original 16k page size.
+		Validate the page shift size is within allowed range. */
+
+		return(false);
+
+	} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+		return(false);
+	}
+
+#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
+# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
+#endif
+
+	/* The DATA_DIR field can be used for any row type so there is
+	nothing here to validate. */
+
+	return(true);
+}
+
+/********************************************************************//**
+Determine if the tablespace is compressed from dict_table_t::flags.
+@return	TRUE if compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_compressed(
+/*====================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************************//**
+Extract the zip size from tablespace flags.
+@return	compressed page size of the file-per-table tablespace in bytes,
+or zero if the table is not compressed. */
+UNIV_INLINE
+ulint
+fsp_flags_get_zip_size(
+/*===================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	ulint	zip_size = 0;
+	ulint	ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
+
+	/* Convert from a 'log2 minus 9' to a page size in bytes. */
+	if (ssize) {
+		zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+		ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+	}
+
+	return(zip_size);
+}
+
+/********************************************************************//**
+Extract the page size from tablespace flags.
+@return	page size of the tablespace in bytes */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_size(
+/*====================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	ulint	page_size = 0;
+	ulint	ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
+
+	/* Convert from a 'log2 minus 9' to a page size in bytes. */
+	if (UNIV_UNLIKELY(ssize)) {
+		page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
+
+		ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
+	} else {
+		/* If the page size was not stored, then it is the
+		original 16k. */
+		page_size = UNIV_PAGE_SIZE_ORIG;
+	}
+
+	return(page_size);
+}
+
+#ifndef UNIV_INNOCHECKSUM
+
+/********************************************************************//**
+Add the page size to the tablespace flags.
+@return	tablespace flags after page size is added */
+UNIV_INLINE
+ulint
+fsp_flags_set_page_size(
+/*====================*/
+	ulint	flags,		/*!< in: tablespace flags */
+	ulint	page_size)	/*!< in: page size in bytes */
+{
+	ulint ssize = 0;
+	ulint shift;
+
+	/* Page size should be > UNIV_PAGE_SIZE_MIN */
+	ut_ad(page_size >= UNIV_PAGE_SIZE_MIN);
+	ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
+
+	if (page_size == UNIV_PAGE_SIZE_ORIG) {
+		ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags));
+		return(flags);
+	}
+
+	for (shift = UNIV_PAGE_SIZE_SHIFT_MAX;
+	     shift >= UNIV_PAGE_SIZE_SHIFT_MIN;
+	     shift--) {
+		ulint	mask = (1 << shift);
+		if (page_size & mask) {
+			ut_ad(!(page_size & ~mask));
+			ssize = shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1;
+			break;
+		}
+	}
+
+	ut_ad(ssize);
+	ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX);
+
+	flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize);
+
+	ut_ad(fsp_flags_is_valid(flags));
+
+	return(flags);
+}
+
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return	descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset)		/*!< in: page offset */
+{
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (zip_size == 0) {
+		return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
+		       / FSP_EXTENT_SIZE);
+	} else {
+		return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
+	}
+}
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return	TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+	const xdes_t*	descr,	/*!< in: descriptor */
+	ulint		bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint		offset)	/*!< in: page offset within extent:
+				0 ... FSP_EXTENT_SIZE - 1 */
+{
+	ut_ad(offset < FSP_EXTENT_SIZE);
+	ut_ad(bit == XDES_FREE_BIT || bit == XDES_CLEAN_BIT);
+
+	ulint	index = bit + XDES_BITS_PER_PAGE * offset;
+
+	ulint	bit_index = index % 8;
+	ulint	byte_index = index / 8;
+
+	return(ut_bit_get_nth(
+			mach_read_ulint(descr + XDES_BITMAP + byte_index,
+					MLOG_1BYTE),
+			bit_index));
+}
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return	descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	offset)		/*!< in: page offset */
+{
+#ifndef DOXYGEN /* Doxygen gets confused by these */
+# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET				\
+			   + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)	\
+			   * XDES_SIZE_MAX
+#  error
+# endif
+# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET				\
+			  + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN)	\
+			  * XDES_SIZE_MIN
+#  error
+# endif
+#endif /* !DOXYGEN */
+
+	ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+	      + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+	      * XDES_SIZE);
+	ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
+	      + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
+	      * XDES_SIZE);
+
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (zip_size == 0) {
+		return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
+	} else {
+		ut_ad(zip_size > XDES_ARR_OFFSET
+		      + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
+		return(ut_2pow_round(offset, zip_size));
+	}
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
new file mode 100644
index 00000000000..94fd908ab0c
--- /dev/null
+++ b/storage/innobase/include/fsp0types.h
@@ -0,0 +1,116 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************
+@file include/fsp0types.h
+File space management types
+
+Created May 26, 2009 Vasil Dimov
+*******************************************************/
+
+#ifndef fsp0types_h
+#define fsp0types_h
+
+#include "univ.i"
+
+#include "fil0fil.h" /* for FIL_PAGE_DATA */
+
+/** @name Flags for inserting records in order
+If records are inserted in order, there are the following
+flags to tell this (their type is made byte for the compiler
+to warn if direction and hint parameters are switched in
+fseg_alloc_free_page) */
+/* @{ */
+#define	FSP_UP		((byte)111)	/*!< alphabetically upwards */
+#define	FSP_DOWN	((byte)112)	/*!< alphabetically downwards */
+#define	FSP_NO_DIR	((byte)113)	/*!< no order */
+/* @} */
+
+/** File space extent size (one megabyte) in pages */
+#define	FSP_EXTENT_SIZE		(1048576U / UNIV_PAGE_SIZE)
+
+/** File space extent size (one megabyte) in pages for MAX page size */
+#define	FSP_EXTENT_SIZE_MAX	(1048576 / UNIV_PAGE_SIZE_MAX)
+
+/** File space extent size (one megabyte) in pages for MIN page size */
+#define	FSP_EXTENT_SIZE_MIN	(1048576 / UNIV_PAGE_SIZE_MIN)
+
+/** On a page of any file segment, data may be put starting from this
+offset */
+#define FSEG_PAGE_DATA		FIL_PAGE_DATA
+
+/** @name File segment header
+The file segment header points to the inode describing the file segment. */
+/* @{ */
+/** Data type for file segment header */
+typedef	byte	fseg_header_t;
+
+#define FSEG_HDR_SPACE		0	/*!< space id of the inode */
+#define FSEG_HDR_PAGE_NO	4	/*!< page number of the inode */
+#define FSEG_HDR_OFFSET		8	/*!< byte offset of the inode */
+
+#define FSEG_HEADER_SIZE	10	/*!< Length of the file system
+					header, in bytes */
+/* @} */
+
+/** Flags for fsp_reserve_free_extents @{ */
+#define FSP_NORMAL	1000000
+#define	FSP_UNDO	2000000
+#define FSP_CLEANING	3000000
+/* @} */
+
+/* Number of pages described in a single descriptor page: currently each page
+description takes less than 1 byte; a descriptor page is repeated every
+this many file pages */
+/* #define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE */
+/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
+
+/** @name The space low address page map
+The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
+every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
+/* @{ */
+/*--------------------------------------*/
+#define FSP_XDES_OFFSET			0	/* !< extent descriptor */
+#define FSP_IBUF_BITMAP_OFFSET		1	/* !< insert buffer bitmap */
+				/* The ibuf bitmap pages are the ones whose
+				page number is the number above plus a
+				multiple of XDES_DESCRIBED_PER_PAGE */
+
+#define FSP_FIRST_INODE_PAGE_NO		2	/*!< in every tablespace */
+				/* The following pages exist
+				in the system tablespace (space 0). */
+#define FSP_IBUF_HEADER_PAGE_NO		3	/*!< insert buffer
+						header page, in
+						tablespace 0 */
+#define FSP_IBUF_TREE_ROOT_PAGE_NO	4	/*!< insert buffer
+						B-tree root page in
+						tablespace 0 */
+				/* The ibuf tree root page number in
+				tablespace 0; its fseg inode is on the page
+				number FSP_FIRST_INODE_PAGE_NO */
+#define FSP_TRX_SYS_PAGE_NO		5	/*!< transaction
+						system header, in
+						tablespace 0 */
+#define	FSP_FIRST_RSEG_PAGE_NO		6	/*!< first rollback segment
+						page, in tablespace 0 */
+#define FSP_DICT_HDR_PAGE_NO		7	/*!< data dictionary header
+						page, in tablespace 0 */
+/*--------------------------------------*/
+/* @} */
+
+#endif /* fsp0types_h */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
new file mode 100644
index 00000000000..b2380f78b39
--- /dev/null
+++ b/storage/innobase/include/fts0ast.h
@@ -0,0 +1,339 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0ast.h
+The FTS query parser (AST) abstract syntax tree routines
+
+Created 2007/03/16/03 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FST0AST_H
+#define INNOBASE_FST0AST_H
+
+#include "mem0mem.h"
+#include "ha_prototypes.h"
+
+/* The type of AST Node */
+enum fts_ast_type_t {
+	FTS_AST_OPER,				/*!< Operator */
+	FTS_AST_NUMB,				/*!< Number */
+	FTS_AST_TERM,				/*!< Term (or word) */
+	FTS_AST_TEXT,				/*!< Text string */
+	FTS_AST_LIST,				/*!< Expression list */
+	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
+};
+
+/* The FTS query operators that we support */
+enum fts_ast_oper_t {
+	FTS_NONE,				/*!< No operator */
+
+	FTS_IGNORE,				/*!< Ignore rows that contain
+						this word */
+
+	FTS_EXIST,				/*!< Include rows that contain
+						this word */
+
+	FTS_NEGATE,				/*!< Include rows that contain
+						this word but rank them
+						lower*/
+
+	FTS_INCR_RATING,			/*!< Increase the rank for this
+						word*/
+
+	FTS_DECR_RATING,			/*!< Decrease the rank for this
+						word*/
+
+	FTS_DISTANCE,				/*!< Proximity distance */
+	FTS_IGNORE_SKIP,			/*!< Transient node operator
+						signifies that this is a
+						FTS_IGNORE node, and ignored in
+						the first pass of
+						fts_ast_visit() */
+	FTS_EXIST_SKIP				/*!< Transient node operator
+						signifies that this ia a
+						FTS_EXIST node, and ignored in
+						the first pass of
+						fts_ast_visit() */
+};
+
+/* Data types used by the FTS parser */
+struct fts_lexer_t;
+struct fts_ast_node_t;
+struct fts_ast_state_t;
+struct fts_ast_string_t;
+
+typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
+
+/********************************************************************
+Parse the string using the lexer setup within state.*/
+int
+fts_parse(
+/*======*/
+						/* out: 0 on OK, 1 on error */
+	fts_ast_state_t* state);		/*!< in: ast state instance.*/
+
+/********************************************************************
+Create an AST operator node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_oper(
+/*=====================*/
+	void*		arg,			/*!< in: ast state */
+	fts_ast_oper_t	oper);			/*!< in: ast operator */
+/********************************************************************
+Create an AST term node, makes a copy of ptr */
+extern
+fts_ast_node_t*
+fts_ast_create_node_term(
+/*=====================*/
+	void*			arg,		/*!< in: ast state */
+	const fts_ast_string_t*	ptr);		/*!< in: term string */
+/********************************************************************
+Create an AST text node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_text(
+/*=====================*/
+	void*			arg,		/*!< in: ast state */
+	const fts_ast_string_t*	ptr);		/*!< in: text string */
+/********************************************************************
+Create an AST expr list node */
+extern
+fts_ast_node_t*
+fts_ast_create_node_list(
+/*=====================*/
+	void*		arg,			/*!< in: ast state */
+	fts_ast_node_t*	expr);			/*!< in: ast expr */
+/********************************************************************
+Create a sub-expression list node. This function takes ownership of
+expr and is responsible for deleting it. */
+extern
+fts_ast_node_t*
+fts_ast_create_node_subexp_list(
+/*============================*/
+						/* out: new node */
+	void*		arg,			/*!< in: ast state instance */
+	fts_ast_node_t*	expr);			/*!< in: ast expr instance */
+/********************************************************************
+Set the wildcard attribute of a term.*/
+extern
+void
+fts_ast_term_set_wildcard(
+/*======================*/
+	fts_ast_node_t*	node);			/*!< in: term to change */
+/********************************************************************
+Set the proximity attribute of a text node. */
+
+void
+fts_ast_term_set_distance(
+/*======================*/
+	fts_ast_node_t*	node,			/*!< in/out: text node */
+	ulint		distance);		/*!< in: the text proximity
+						distance */
+/********************************************************************//**
+Free a fts_ast_node_t instance.
+@return next node to free */
+UNIV_INTERN
+fts_ast_node_t*
+fts_ast_free_node(
+/*==============*/
+	fts_ast_node_t*	node);			/*!< in: node to free */
+/********************************************************************
+Add a sub-expression to an AST*/
+extern
+fts_ast_node_t*
+fts_ast_add_node(
+/*=============*/
+	fts_ast_node_t*	list,			/*!< in: list node instance */
+	fts_ast_node_t*	node);			/*!< in: (sub) expr to add */
+/********************************************************************
+Print the AST node recursively.*/
+extern
+void
+fts_ast_node_print(
+/*===============*/
+	fts_ast_node_t*	node);			/*!< in: ast node to print */
+/********************************************************************
+For tracking node allocations, in case there is an during parsing.*/
+extern
+void
+fts_ast_state_add_node(
+/*===================*/
+	fts_ast_state_t*state,			/*!< in: ast state instance */
+	fts_ast_node_t*	node);			/*!< in: node to add to state */
+/********************************************************************
+Free node and expr allocations.*/
+extern
+void
+fts_ast_state_free(
+/*===============*/
+	fts_ast_state_t*state);			/*!< in: state instance
+						to free */
+/******************************************************************//**
+Traverse the AST - in-order traversal.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
+fts_ast_visit(
+/*==========*/
+	fts_ast_oper_t		oper,		/*!< in: FTS operator */
+	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
+	fts_ast_callback	visitor,	/*!< in: callback */
+	void*			arg,		/*!< in: callback arg */
+	bool*			has_ignore)	/*!< out: whether we encounter
+						and ignored processing an
+						operator, currently we only
+						ignore FTS_IGNORE operator */
+	__attribute__((nonnull, warn_unused_result));
+/*****************************************************************//**
+Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
+fts_ast_visit_sub_exp(
+/*==================*/
+	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
+	fts_ast_callback	visitor,	/*!< in: callback */
+	void*			arg)		/*!< in: callback arg */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************
+Create a lex instance.*/
+UNIV_INTERN
+fts_lexer_t*
+fts_lexer_create(
+/*=============*/
+	ibool		boolean_mode,		/*!< in: query type */
+	const byte*	query,			/*!< in: query string */
+	ulint		query_len)		/*!< in: query string len */
+	__attribute__((nonnull, malloc, warn_unused_result));
+/********************************************************************
+Free an fts_lexer_t instance.*/
+UNIV_INTERN
+void
+fts_lexer_free(
+/*===========*/
+	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
+						free */
+	__attribute__((nonnull));
+
+/**
+Create an ast string object, with NUL-terminator, so the string
+has one more byte than len
+@param[in] str		pointer to string
+@param[in] len		length of the string
+@return ast string with NUL-terminator */
+UNIV_INTERN
+fts_ast_string_t*
+fts_ast_string_create(
+	const byte*	str,
+	ulint		len);
+
+/**
+Free an ast string instance
+@param[in,out] ast_str		string to free */
+UNIV_INTERN
+void
+fts_ast_string_free(
+	fts_ast_string_t*	ast_str);
+
+/**
+Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
+@param[in] str		string to translate
+@param[in] base		the base
+@return translated number */
+UNIV_INTERN
+ulint
+fts_ast_string_to_ul(
+	const fts_ast_string_t*	ast_str,
+	int			base);
+
+/**
+Print the ast string
+@param[in] str		string to print */
+UNIV_INTERN
+void
+fts_ast_string_print(
+	const fts_ast_string_t*	ast_str);
+
+/* String of length len.
+We always store the string of length len with a terminating '\0',
+regardless of there is any 0x00 in the string itself */
+struct fts_ast_string_t {
+	/*!< Pointer to string. */
+	byte*		str;
+
+	/*!< Length of the string. */
+	ulint		len;
+};
+
+/* Query term type */
+struct fts_ast_term_t {
+	fts_ast_string_t*	ptr;		/*!< Pointer to term string.*/
+	ibool			wildcard;	/*!< TRUE if wild card set.*/
+};
+
+/* Query text type */
+struct fts_ast_text_t {
+	fts_ast_string_t*	ptr;		/*!< Pointer to text string.*/
+	ulint			distance;	/*!< > 0 if proximity distance
+						set */
+};
+
+/* The list of nodes in an expr list */
+struct fts_ast_list_t {
+	fts_ast_node_t*	head;			/*!< Children list head */
+	fts_ast_node_t*	tail;			/*!< Children list tail */
+};
+
+/* FTS AST node to store the term, text, operator and sub-expressions.*/
+struct fts_ast_node_t {
+	fts_ast_type_t	type;			/*!< The type of node */
+	fts_ast_text_t	text;			/*!< Text node */
+	fts_ast_term_t	term;			/*!< Term node */
+	fts_ast_oper_t	oper;			/*!< Operator value */
+	fts_ast_list_t	list;			/*!< Expression list */
+	fts_ast_node_t*	next;			/*!< Link for expr list */
+	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
+	bool		visited;		/*!< whether this node is
+						already processed */
+};
+
+/* To track state during parsing */
+struct fts_ast_state_t {
+	mem_heap_t*	heap;			/*!< Heap to use for alloc */
+	fts_ast_node_t*	root;			/*!< If all goes OK, then this
+						will point to the root.*/
+
+	fts_ast_list_t	list;			/*!< List of nodes allocated */
+
+	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
+	CHARSET_INFO*	charset;		/*!< charset used for
+						tokenization */
+};
+
+#ifdef UNIV_DEBUG
+const char*
+fts_ast_oper_name_get(fts_ast_oper_t	oper);
+const char*
+fts_ast_node_type_get(fts_ast_type_t	type);
+#endif /* UNIV_DEBUG */
+
+#endif /* INNOBASE_FSTS0AST_H */
diff --git a/storage/innobase/include/fts0blex.h b/storage/innobase/include/fts0blex.h
new file mode 100644
index 00000000000..d0e4cae0678
--- /dev/null
+++ b/storage/innobase/include/fts0blex.h
@@ -0,0 +1,349 @@
+#ifndef fts0bHEADER_H
+#define fts0bHEADER_H 1
+#define fts0bIN_HEADER 1
+
+#line 6 "../include/fts0blex.h"
+
+#line 8 "../include/fts0blex.h"
+
+#define  YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types. 
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t; 
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN               (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX               (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX              (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX             (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else	/* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif	/* defined (__STDC__) */
+#endif	/* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+   are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	int yy_n_chars;
+
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+    
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+
+	int yy_buffer_status;
+
+	};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+void fts0brestart (FILE *input_file ,yyscan_t yyscanner );
+void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void fts0bpop_buffer_state (yyscan_t yyscanner );
+
+YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
+
+void *fts0balloc (yy_size_t ,yyscan_t yyscanner );
+void *fts0brealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void fts0bfree (void * ,yyscan_t yyscanner );
+
+/* Begin user sect3 */
+
+#define fts0bwrap(n) 1
+#define YY_SKIP_YYWRAP
+
+#define yytext_ptr yytext_r
+
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+
+#endif
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+int fts0blex_init (yyscan_t* scanner);
+
+int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int fts0blex_destroy (yyscan_t yyscanner );
+
+int fts0bget_debug (yyscan_t yyscanner );
+
+void fts0bset_debug (int debug_flag ,yyscan_t yyscanner );
+
+YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner );
+
+void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+
+FILE *fts0bget_in (yyscan_t yyscanner );
+
+void fts0bset_in  (FILE * in_str ,yyscan_t yyscanner );
+
+FILE *fts0bget_out (yyscan_t yyscanner );
+
+void fts0bset_out  (FILE * out_str ,yyscan_t yyscanner );
+
+int fts0bget_leng (yyscan_t yyscanner );
+
+char *fts0bget_text (yyscan_t yyscanner );
+
+int fts0bget_lineno (yyscan_t yyscanner );
+
+void fts0bset_lineno (int line_number ,yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int fts0bwrap (yyscan_t yyscanner );
+#else
+extern int fts0bwrap (yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int fts0blex (yyscan_t yyscanner);
+
+#define YY_DECL int fts0blex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+
+#line 73 "fts0blex.l"
+
+
+#line 348 "../include/fts0blex.h"
+#undef fts0bIN_HEADER
+#endif /* fts0bHEADER_H */
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
new file mode 100644
index 00000000000..a2996ecacc8
--- /dev/null
+++ b/storage/innobase/include/fts0fts.h
@@ -0,0 +1,1039 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0fts.h
+Full text search header file
+
+Created 2011/09/02 Sunny Bains
+***********************************************************************/
+
+#ifndef fts0fts_h
+#define fts0fts_h
+
+#include "univ.i"
+
+#include "data0type.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+#include "ut0rbt.h"
+#include "ut0wqueue.h"
+#include "que0types.h"
+#include "ft_global.h"
+
+/** "NULL" value of a document id. */
+#define FTS_NULL_DOC_ID			0
+
+/** FTS hidden column that is used to map to and from the row */
+#define FTS_DOC_ID_COL_NAME		"FTS_DOC_ID"
+
+/** The name of the index created by FTS */
+#define FTS_DOC_ID_INDEX_NAME		"FTS_DOC_ID_INDEX"
+
+#define FTS_DOC_ID_INDEX_NAME_LEN	16
+
+/** Doc ID is a 8 byte value */
+#define FTS_DOC_ID_LEN			8
+
+/** The number of fields to sort when we build FT index with
+FIC. Three fields are sort: (word, doc_id, position) */
+#define FTS_NUM_FIELDS_SORT		3
+
+/** Maximum number of rows in a table, smaller than which, we will
+optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
+#define MAX_DOC_ID_OPT_VAL		1073741824
+
+/** Document id type. */
+typedef ib_uint64_t doc_id_t;
+
+/** doc_id_t printf format */
+#define FTS_DOC_ID_FORMAT	IB_ID_FMT
+
+/** Convert document id to the InnoDB (BIG ENDIAN) storage format. */
+#define fts_write_doc_id(d, s)	mach_write_to_8(d, s)
+
+/** Read a document id to internal format. */
+#define fts_read_doc_id(s)	mach_read_from_8(s)
+
+/** Bind the doc id to a variable */
+#define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v)
+
+/** Defines for FTS query mode, they have the same values as
+those defined in mysql file ft_global.h */
+#define FTS_NL		0
+#define FTS_BOOL	1
+#define FTS_SORTED	2
+#define FTS_EXPAND	4
+#define FTS_PROXIMITY	8
+#define FTS_PHRASE	16
+#define FTS_OPT_RANKING	32
+
+#define FTS_INDEX_TABLE_IND_NAME	"FTS_INDEX_TABLE_IND"
+
+/** Threshold where our optimize thread automatically kicks in */
+#define FTS_OPTIMIZE_THRESHOLD		10000000
+
+#define FTS_DOC_ID_MAX_STEP		10000
+/** Variable specifying the FTS parallel sort degree */
+extern ulong		fts_sort_pll_degree;
+
+/** Variable specifying the number of word to optimize for each optimize table
+call */
+extern ulong		fts_num_word_optimize;
+
+/** Variable specifying whether we do additional FTS diagnostic printout
+in the log */
+extern char		fts_enable_diag_print;
+
+/** FTS rank type, which will be between 0 .. 1 inclusive */
+typedef float 		fts_rank_t;
+
+/** Type of a row during a transaction. FTS_NOTHING means the row can be
+forgotten from the FTS system's POV, FTS_INVALID is an internal value used
+to mark invalid states.
+
+NOTE: Do not change the order or value of these, fts_trx_row_get_new_state
+depends on them being exactly as they are. */
+enum fts_row_state {
+	FTS_INSERT = 0,
+	FTS_MODIFY,
+	FTS_DELETE,
+	FTS_NOTHING,
+	FTS_INVALID
+};
+
+/** The FTS table types. */
+enum fts_table_type_t {
+	FTS_INDEX_TABLE,		/*!< FTS auxiliary table that is
+					specific to a particular FTS index
+					on a table */
+
+	FTS_COMMON_TABLE		/*!< FTS auxiliary table that is common
+					for all FTS index on a table */
+};
+
+struct fts_doc_t;
+struct fts_cache_t;
+struct fts_token_t;
+struct fts_doc_ids_t;
+struct fts_index_cache_t;
+
+
+/** Initialize the "fts_table" for internal query into FTS auxiliary
+tables */
+#define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\
+do {								\
+	(fts_table)->suffix = m_suffix;				\
+        (fts_table)->type = m_type;				\
+        (fts_table)->table_id = m_table->id;			\
+        (fts_table)->parent = m_table->name;			\
+        (fts_table)->table = m_table;				\
+} while (0);
+
+#define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\
+do {								\
+	(fts_table)->suffix = m_suffix;				\
+        (fts_table)->type = m_type;				\
+        (fts_table)->table_id = m_index->table->id;		\
+        (fts_table)->parent = m_index->table->name;		\
+        (fts_table)->table = m_index->table;			\
+        (fts_table)->index_id = m_index->id;			\
+} while (0);
+
+/** Information about changes in a single transaction affecting
+the FTS system. */
+struct fts_trx_t {
+	trx_t*		trx;		/*!< InnoDB transaction */
+
+	ib_vector_t*	savepoints;	/*!< Active savepoints, must have at
+					least one element, the implied
+					savepoint */
+	ib_vector_t*	last_stmt;	/*!< last_stmt */
+
+	mem_heap_t*	heap;		/*!< heap */
+};
+
+/** Information required for transaction savepoint handling. */
+struct fts_savepoint_t {
+	char*		name;		/*!< First entry is always NULL, the
+					default instance. Otherwise the name
+					of the savepoint */
+
+	ib_rbt_t*	tables;		/*!< Modified FTS tables */
+};
+
+/** Information about changed rows in a transaction for a single table. */
+struct fts_trx_table_t {
+	dict_table_t*	table;		/*!< table */
+
+	fts_trx_t*	fts_trx;	/*!< link to parent */
+
+	ib_rbt_t*	rows;		/*!< rows changed; indexed by doc-id,
+					cells are fts_trx_row_t* */
+
+	fts_doc_ids_t*	added_doc_ids;	/*!< list of added doc ids (NULL until
+					the first addition) */
+
+					/*!< for adding doc ids */
+	que_t*		docs_added_graph;
+};
+
+/** Information about one changed row in a transaction. */
+struct fts_trx_row_t {
+	doc_id_t	doc_id;		/*!< Id of the ins/upd/del document */
+
+	fts_row_state	state;		/*!< state of the row */
+
+	ib_vector_t*	fts_indexes;	/*!< The indexes that are affected */
+};
+
+/** List of document ids that were added during a transaction. This
+list is passed on to a background 'Add' thread and OPTIMIZE, so it
+needs its own memory heap. */
+struct fts_doc_ids_t {
+	ib_vector_t*	doc_ids;	/*!< document ids (each element is
+					of type doc_id_t). */
+
+	ib_alloc_t*	self_heap;	/*!< Allocator used to create an
+					instance of this type and the
+					doc_ids vector */
+};
+
+// FIXME: Get rid of this if possible.
+/** Since MySQL's character set support for Unicode is woefully inadequate
+(it supports basic operations like isalpha etc. only for 8-bit characters),
+we have to implement our own. We use UTF-16 without surrogate processing
+as our in-memory format. This typedef is a single such character. */
+typedef unsigned short ib_uc_t;
+
+/** An UTF-16 ro UTF-8 string. */
+struct fts_string_t {
+	byte*		f_str;		/*!< string, not necessary terminated in
+					any way */
+	ulint		f_len;		/*!< Length of the string in bytes */
+	ulint		f_n_char;	/*!< Number of characters */
+};
+
+/** Query ranked doc ids. */
+struct fts_ranking_t {
+	doc_id_t	doc_id;		/*!< Document id */
+
+	fts_rank_t	rank;		/*!< Rank is between 0 .. 1 */
+
+	byte*		words;		/*!< this contains the words
+					that were queried
+					and found in this document */
+	ulint		words_len;	/*!< words len */
+};
+
+/** Query result. */
+struct fts_result_t {
+	ib_rbt_node_t*	current;	/*!< Current element */
+
+	ib_rbt_t*	rankings_by_id;	/*!< RB tree of type fts_ranking_t
+					indexed by doc id */
+	ib_rbt_t*	rankings_by_rank;/*!< RB tree of type fts_ranking_t
+					indexed by rank */
+};
+
+/** This is used to generate the FTS auxiliary table name, we need the
+table id and the index id to generate the column specific FTS auxiliary
+table name. */
+struct fts_table_t {
+	const char*	parent;		/*!< Parent table name, this is
+					required only for the database
+					name */
+
+	fts_table_type_t
+			type;		/*!< The auxiliary table type */
+
+	table_id_t	table_id;	/*!< The table id */
+
+	index_id_t	index_id;	/*!< The index id */
+
+	const char*	suffix;		/*!< The suffix of the fts auxiliary
+					table name, can be NULL, not used
+					everywhere (yet) */
+	const dict_table_t*
+			table;		/*!< Parent table */
+	CHARSET_INFO*	charset;	/*!< charset info if it is for FTS
+					index auxiliary table */
+};
+
+enum	fts_status {
+	BG_THREAD_STOP = 1,	 	/*!< TRUE if the FTS background thread
+					has finished reading the ADDED table,
+					meaning more items can be added to
+					the table. */
+
+	BG_THREAD_READY = 2,		/*!< TRUE if the FTS background thread
+					is ready */
+
+	ADD_THREAD_STARTED = 4,		/*!< TRUE if the FTS add thread
+					has started */
+
+	ADDED_TABLE_SYNCED = 8,		/*!< TRUE if the ADDED table record is
+					sync-ed after crash recovery */
+
+	TABLE_DICT_LOCKED = 16		/*!< Set if the table has
+					dict_sys->mutex */
+};
+
+typedef	enum fts_status	fts_status_t;
+
+/** The state of the FTS sub system. */
+struct fts_t {
+					/*!< mutex protecting bg_threads* and
+					fts_add_wq. */
+	ib_mutex_t		bg_threads_mutex;
+
+	ulint		bg_threads;	/*!< number of background threads
+					accessing this table */
+
+					/*!< TRUE if background threads running
+					should stop themselves */
+	ulint		fts_status;	/*!< Status bit regarding fts
+					running state */
+
+	ib_wqueue_t*	add_wq;		/*!< Work queue for scheduling jobs
+					for the FTS 'Add' thread, or NULL
+					if the thread has not yet been
+					created. Each work item is a
+					fts_trx_doc_ids_t*. */
+
+	fts_cache_t*	cache;		/*!< FTS memory buffer for this table,
+					or NULL if the table has no FTS
+					index. */
+
+	ulint		doc_col;	/*!< FTS doc id hidden column number
+					in the CLUSTERED index. */
+
+	ib_vector_t*	indexes;	/*!< Vector of FTS indexes, this is
+					mainly for caching purposes. */
+	mem_heap_t*	fts_heap;	/*!< heap for fts_t allocation */
+};
+
+struct fts_stopword_t;
+
+/** status bits for fts_stopword_t status field. */
+#define STOPWORD_NOT_INIT               0x1
+#define STOPWORD_OFF                    0x2
+#define STOPWORD_FROM_DEFAULT           0x4
+#define STOPWORD_USER_TABLE             0x8
+
+extern const char*	fts_default_stopword[];
+
+/** Variable specifying the maximum FTS cache size for each table */
+extern ulong		fts_max_cache_size;
+
+/** Variable specifying the total memory allocated for FTS cache */
+extern ulong		fts_max_total_cache_size;
+
+/** Variable specifying the FTS result cache limit for each query */
+extern ulong		fts_result_cache_limit;
+
+/** Variable specifying the maximum FTS max token size */
+extern ulong		fts_max_token_size;
+
+/** Variable specifying the minimum FTS max token size */
+extern ulong		fts_min_token_size;
+
+/** Whether the total memory used for FTS cache is exhausted, and we will
+need a sync to free some memory */
+extern bool		fts_need_sync;
+
+/** Maximum possible Fulltext word length */
+#define FTS_MAX_WORD_LEN		HA_FT_MAXBYTELEN
+
+/** Maximum possible Fulltext word length (in characters) */
+#define FTS_MAX_WORD_LEN_IN_CHAR	HA_FT_MAXCHARLEN
+
+/** Variable specifying the table that has Fulltext index to display its
+content through information schema table */
+extern char*		fts_internal_tbl_name;
+
+#define	fts_que_graph_free(graph)			\
+do {							\
+	mutex_enter(&dict_sys->mutex);			\
+	que_graph_free(graph);				\
+	mutex_exit(&dict_sys->mutex);			\
+} while (0)
+
+/******************************************************************//**
+Create a FTS cache. */
+UNIV_INTERN
+fts_cache_t*
+fts_cache_create(
+/*=============*/
+	dict_table_t*	table);			/*!< table owns the FTS cache */
+
+/******************************************************************//**
+Create a FTS index cache.
+@return Index Cache */
+UNIV_INTERN
+fts_index_cache_t*
+fts_cache_index_cache_create(
+/*=========================*/
+	dict_table_t*	table,			/*!< in: table with FTS index */
+	dict_index_t*	index);			/*!< in: FTS index */
+
+/******************************************************************//**
+Get the next available document id. This function creates a new
+transaction to generate the document id.
+@return DB_SUCCESS if OK */
+UNIV_INTERN
+dberr_t
+fts_get_next_doc_id(
+/*================*/
+	const dict_table_t*	table,	/*!< in: table */
+	doc_id_t*		doc_id)	/*!< out: new document id */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Update the next and last Doc ID in the CONFIG table to be the input
+"doc_id" value (+ 1). We would do so after each FTS index build or
+table truncate */
+UNIV_INTERN
+void
+fts_update_next_doc_id(
+/*===================*/
+	trx_t*			trx,		/*!< in/out: transaction */
+	const dict_table_t*	table,		/*!< in: table */
+	const char*		table_name,	/*!< in: table name, or NULL */
+	doc_id_t		doc_id)		/*!< in: DOC ID to set */
+	__attribute__((nonnull(2)));
+
+/******************************************************************//**
+Create a new document id .
+@return DB_SUCCESS if all went well else error */
+UNIV_INTERN
+dberr_t
+fts_create_doc_id(
+/*==============*/
+	dict_table_t*	table,			/*!< in: row is of this
+						table. */
+	dtuple_t*	row,			/*!< in/out: add doc id
+						value to this row. This is the
+						current row that is being
+						inserted. */
+	mem_heap_t*	heap)			/*!< in: heap */
+	__attribute__((nonnull));
+/******************************************************************//**
+Create a new fts_doc_ids_t.
+@return new fts_doc_ids_t. */
+UNIV_INTERN
+fts_doc_ids_t*
+fts_doc_ids_create(void);
+/*=====================*/
+
+/******************************************************************//**
+Free a fts_doc_ids_t. */
+UNIV_INTERN
+void
+fts_doc_ids_free(
+/*=============*/
+	fts_doc_ids_t*	doc_ids);		/*!< in: doc_ids to free */
+
+/******************************************************************//**
+Notify the FTS system about an operation on an FTS-indexed table. */
+UNIV_INTERN
+void
+fts_trx_add_op(
+/*===========*/
+	trx_t*		trx,			/*!< in: InnoDB transaction */
+	dict_table_t*	table,			/*!< in: table */
+	doc_id_t	doc_id,			/*!< in: doc id */
+	fts_row_state	state,			/*!< in: state of the row */
+	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
+						(NULL=all) */
+	__attribute__((nonnull(1,2)));
+
+/******************************************************************//**
+Free an FTS trx. */
+UNIV_INTERN
+void
+fts_trx_free(
+/*=========*/
+	fts_trx_t*	fts_trx);		/*!< in, own: FTS trx */
+
+/******************************************************************//**
+Creates the common ancillary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been
+called before this.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_create_common_tables(
+/*=====================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const dict_table_t*
+			table,			/*!< in: table with one FTS
+						index */
+	const char*	name,			/*!< in: table name */
+	bool		skip_doc_id_index)	/*!< in: Skip index on doc id */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Wrapper function of fts_create_index_tables_low(), create auxiliary
+tables for an FTS index
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_create_index_tables(
+/*====================*/
+	trx_t*			trx,		/*!< in: transaction handle */
+	const dict_index_t*	index)		/*!< in: the FTS index
+						instance */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Creates the column specific ancillary tables needed for supporting an
+FTS index on the given table. row_mysql_lock_data_dictionary must have
+been called before this.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_create_index_tables_low(
+/*========================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const dict_index_t*
+			index,			/*!< in: the FTS index
+						instance */
+	const char*	table_name,		/*!< in: the table name */
+	table_id_t	table_id)		/*!< in: the table id */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Add the FTS document id hidden column. */
+UNIV_INTERN
+void
+fts_add_doc_id_column(
+/*==================*/
+	dict_table_t*	table,	/*!< in/out: Table with FTS index */
+	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
+	__attribute__((nonnull(1)));
+
+/*********************************************************************//**
+Drops the ancillary tables needed for supporting an FTS index on the
+given table. row_mysql_lock_data_dictionary must have been called before
+this.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_drop_tables(
+/*============*/
+	trx_t*		trx,			/*!< in: transaction */
+	dict_table_t*	table)			/*!< in: table has the FTS
+						index */
+	__attribute__((nonnull));
+/******************************************************************//**
+The given transaction is about to be committed; do whatever is necessary
+from the FTS system's POV.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_commit(
+/*=======*/
+	trx_t*		trx)			/*!< in: transaction */
+	__attribute__((nonnull, warn_unused_result));
+
+/*******************************************************************//**
+FTS Query entry point.
+@return DB_SUCCESS if successful otherwise error code */
+UNIV_INTERN
+dberr_t
+fts_query(
+/*======*/
+	trx_t*		trx,			/*!< in: transaction */
+	dict_index_t*	index,			/*!< in: FTS index to search */
+	uint		flags,			/*!< in: FTS search mode */
+	const byte*	query,			/*!< in: FTS query */
+	ulint		query_len,		/*!< in: FTS query string len
+						in bytes */
+	fts_result_t**	result)			/*!< out: query result, to be
+						freed by the caller.*/
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************************//**
+Retrieve the FTS Relevance Ranking result for doc with doc_id
+@return the relevance ranking value. */
+UNIV_INTERN
+float
+fts_retrieve_ranking(
+/*=================*/
+	fts_result_t*	result,			/*!< in: FTS result structure */
+	doc_id_t	doc_id);		/*!< in: the interested document
+						doc_id */
+
+/******************************************************************//**
+FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
+UNIV_INTERN
+void
+fts_query_sort_result_on_rank(
+/*==========================*/
+	fts_result_t*	result);		/*!< out: result instance
+						to sort.*/
+
+/******************************************************************//**
+FTS Query free result, returned by fts_query(). */
+UNIV_INTERN
+void
+fts_query_free_result(
+/*==================*/
+	fts_result_t*	result);		/*!< in: result instance
+						to free.*/
+
+/******************************************************************//**
+Extract the doc id from the FTS hidden column. */
+UNIV_INTERN
+doc_id_t
+fts_get_doc_id_from_row(
+/*====================*/
+	dict_table_t*	table,			/*!< in: table */
+	dtuple_t*	row);			/*!< in: row whose FTS doc id we
+						want to extract.*/
+
+/******************************************************************//**
+Extract the doc id from the FTS hidden column. */
+UNIV_INTERN
+doc_id_t
+fts_get_doc_id_from_rec(
+/*====================*/
+	dict_table_t*	table,			/*!< in: table */
+	const rec_t*	rec,			/*!< in: rec */
+	mem_heap_t*	heap);			/*!< in: heap */
+
+/******************************************************************//**
+Update the query graph with a new document id.
+@return Doc ID used */
+UNIV_INTERN
+doc_id_t
+fts_update_doc_id(
+/*==============*/
+	dict_table_t*	table,			/*!< in: table */
+	upd_field_t*	ufield,			/*!< out: update node */
+	doc_id_t*	next_doc_id);		/*!< out: buffer for writing */
+
+/******************************************************************//**
+FTS initialize. */
+UNIV_INTERN
+void
+fts_startup(void);
+/*==============*/
+
+/******************************************************************//**
+Signal FTS threads to initiate shutdown. */
+UNIV_INTERN
+void
+fts_start_shutdown(
+/*===============*/
+	dict_table_t*	table,			/*!< in: table with FTS
+						indexes */
+	fts_t*		fts);			/*!< in: fts instance to
+						shutdown */
+
+/******************************************************************//**
+Wait for FTS threads to shutdown. */
+UNIV_INTERN
+void
+fts_shutdown(
+/*=========*/
+	dict_table_t*	table,			/*!< in: table with FTS
+						indexes */
+	fts_t*		fts);			/*!< in: fts instance to
+						shutdown */
+
+/******************************************************************//**
+Create an instance of fts_t.
+@return instance of fts_t */
+UNIV_INTERN
+fts_t*
+fts_create(
+/*=======*/
+	dict_table_t*	table);			/*!< out: table with FTS
+						indexes */
+
+/**********************************************************************//**
+Free the FTS resources. */
+UNIV_INTERN
+void
+fts_free(
+/*=====*/
+	dict_table_t*   table);			/*!< in/out: table with
+						FTS indexes */
+
+/*********************************************************************//**
+Run OPTIMIZE on the given table.
+@return DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+fts_optimize_table(
+/*===============*/
+	dict_table_t*	table)			/*!< in: table to optimiza */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Startup the optimize thread and create the work queue. */
+UNIV_INTERN
+void
+fts_optimize_init(void);
+/*====================*/
+
+/**********************************************************************//**
+Check whether the work queue is initialized.
+@return TRUE if optimze queue is initialized. */
+UNIV_INTERN
+ibool
+fts_optimize_is_init(void);
+/*======================*/
+
+/****************************************************************//**
+Drops index ancillary tables for a FTS index
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_drop_index_tables(
+/*==================*/
+	trx_t*		trx,			/*!< in: transaction */
+	dict_index_t*	index)			/*!< in: Index to drop */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************************//**
+Remove the table from the OPTIMIZER's list. We do wait for
+acknowledgement from the consumer of the message. */
+UNIV_INTERN
+void
+fts_optimize_remove_table(
+/*======================*/
+	dict_table_t*	table);			/*!< in: table to remove */
+
+/**********************************************************************//**
+Signal the optimize thread to prepare for shutdown. */
+UNIV_INTERN
+void
+fts_optimize_start_shutdown(void);
+/*==============================*/
+
+/**********************************************************************//**
+Inform optimize to clean up. */
+UNIV_INTERN
+void
+fts_optimize_end(void);
+/*===================*/
+
+/**********************************************************************//**
+Take a FTS savepoint. */
+UNIV_INTERN
+void
+fts_savepoint_take(
+/*===============*/
+	trx_t*		trx,			/*!< in: transaction */
+	fts_trx_t*	fts_trx,		/*!< in: fts transaction */
+	const char*	name)			/*!< in: savepoint name */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Refresh last statement savepoint. */
+UNIV_INTERN
+void
+fts_savepoint_laststmt_refresh(
+/*===========================*/
+	trx_t*		trx)			/*!< in: transaction */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Release the savepoint data identified by  name. */
+UNIV_INTERN
+void
+fts_savepoint_release(
+/*==================*/
+	trx_t*		trx,			/*!< in: transaction */
+	const char*	name);			/*!< in: savepoint name */
+
+/**********************************************************************//**
+Free the FTS cache. */
+UNIV_INTERN
+void
+fts_cache_destroy(
+/*==============*/
+	fts_cache_t*	cache);			/*!< in: cache*/
+
+/*********************************************************************//**
+Clear cache. */
+UNIV_INTERN
+void
+fts_cache_clear(
+/*============*/
+	fts_cache_t*	cache);			/*!< in: cache */
+
+/*********************************************************************//**
+Initialize things in cache. */
+UNIV_INTERN
+void
+fts_cache_init(
+/*===========*/
+	fts_cache_t*	cache);			/*!< in: cache */
+
+/*********************************************************************//**
+Rollback to and including savepoint indentified by name. */
+UNIV_INTERN
+void
+fts_savepoint_rollback(
+/*===================*/
+	trx_t*		trx,			/*!< in: transaction */
+	const char*	name);			/*!< in: savepoint name */
+
+/*********************************************************************//**
+Rollback to and including savepoint indentified by name. */
+UNIV_INTERN
+void
+fts_savepoint_rollback_last_stmt(
+/*=============================*/
+	trx_t*		trx);			/*!< in: transaction */
+
+/***********************************************************************//**
+Drop all orphaned FTS auxiliary tables, those that don't have a parent
+table or FTS index defined on them. */
+UNIV_INTERN
+void
+fts_drop_orphaned_tables(void);
+/*==========================*/
+
+/******************************************************************//**
+Since we do a horizontal split on the index table, we need to drop
+all the split tables.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_drop_index_split_tables(
+/*========================*/
+	trx_t*		trx,			/*!< in: transaction */
+	dict_index_t*	index)			/*!< in: fts instance */
+	__attribute__((nonnull, warn_unused_result));
+
+/****************************************************************//**
+Run SYNC on the table, i.e., write out data from the cache to the
+FTS auxiliary INDEX table and clear the cache at the end. */
+UNIV_INTERN
+dberr_t
+fts_sync_table(
+/*===========*/
+	dict_table_t*	table)			/*!< in: table */
+	__attribute__((nonnull));
+
+/****************************************************************//**
+Free the query graph but check whether dict_sys->mutex is already
+held */
+UNIV_INTERN
+void
+fts_que_graph_free_check_lock(
+/*==========================*/
+	fts_table_t*		fts_table,	/*!< in: FTS table */
+	const fts_index_cache_t*index_cache,	/*!< in: FTS index cache */
+	que_t*			graph);		/*!< in: query graph */
+
+/****************************************************************//**
+Create an FTS index cache. */
+UNIV_INTERN
+CHARSET_INFO*
+fts_index_get_charset(
+/*==================*/
+	dict_index_t*		index);		/*!< in: FTS index */
+
+/*********************************************************************//**
+Get the initial Doc ID by consulting the CONFIG table
+@return initial Doc ID */
+UNIV_INTERN
+doc_id_t
+fts_init_doc_id(
+/*============*/
+	const dict_table_t*		table);	/*!< in: table */
+
+/******************************************************************//**
+compare two character string according to their charset. */
+extern
+int
+innobase_fts_text_cmp(
+/*==================*/
+	const void*	cs,			/*!< in: Character set */
+	const void*	p1,			/*!< in: key */
+	const void*	p2);			/*!< in: node */
+
+/******************************************************************//**
+Makes all characters in a string lower case. */
+extern
+size_t
+innobase_fts_casedn_str(
+/*====================*/
+        CHARSET_INFO*	cs,			/*!< in: Character set */
+	char*		src,			/*!< in: string to put in
+						lower case */
+	size_t		src_len,		/*!< in: input string length */
+	char*		dst,			/*!< in: buffer for result
+						string */
+	size_t		dst_len);		/*!< in: buffer size */
+
+
+/******************************************************************//**
+compare two character string according to their charset. */
+extern
+int
+innobase_fts_text_cmp_prefix(
+/*=========================*/
+	const void*	cs,			/*!< in: Character set */
+	const void*	p1,			/*!< in: key */
+	const void*	p2);			/*!< in: node */
+
+/*************************************************************//**
+Get the next token from the given string and store it in *token. */
+extern
+ulint
+innobase_mysql_fts_get_token(
+/*=========================*/
+	CHARSET_INFO*	charset,		/*!< in: Character set */
+	const byte*	start,			/*!< in: start of text */
+	const byte*	end,			/*!< in: one character past
+						end of text */
+	fts_string_t*	token,			/*!< out: token's text */
+	ulint*		offset);		/*!< out: offset to token,
+						measured as characters from
+						'start' */
+
+/*********************************************************************//**
+Fetch COUNT(*) from specified table.
+@return the number of rows in the table */
+UNIV_INTERN
+ulint
+fts_get_rows_count(
+/*===============*/
+	fts_table_t*	fts_table);		/*!< in: fts table to read */
+
+/*************************************************************//**
+Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
+@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
+UNIV_INTERN
+doc_id_t
+fts_get_max_doc_id(
+/*===============*/
+	dict_table_t*	table);			/*!< in: user table */
+
+/******************************************************************//**
+Check whether user supplied stopword table exists and is of
+the right format.
+@return the stopword column charset if qualifies */
+UNIV_INTERN
+CHARSET_INFO*
+fts_valid_stopword_table(
+/*=====================*/
+	const char*	stopword_table_name);	/*!< in: Stopword table
+						name */
+/****************************************************************//**
+This function loads specified stopword into FTS cache
+@return TRUE if success */
+UNIV_INTERN
+ibool
+fts_load_stopword(
+/*==============*/
+	const dict_table_t*
+			table,			/*!< in: Table with FTS */
+	trx_t*		trx,			/*!< in: Transaction */
+	const char*	global_stopword_table,	/*!< in: Global stopword table
+						name */
+	const char*	session_stopword_table,	/*!< in: Session stopword table
+						name */
+	ibool		stopword_is_on,		/*!< in: Whether stopword
+						option is turned on/off */
+	ibool		reload);		/*!< in: Whether it is during
+						reload of FTS table */
+
+/****************************************************************//**
+Create the vector of fts_get_doc_t instances.
+@return vector of fts_get_doc_t instances */
+UNIV_INTERN
+ib_vector_t*
+fts_get_docs_create(
+/*================*/
+	fts_cache_t*	cache);			/*!< in: fts cache */
+
+/****************************************************************//**
+Read the rows from the FTS index
+@return DB_SUCCESS if OK */
+UNIV_INTERN
+dberr_t
+fts_table_fetch_doc_ids(
+/*====================*/
+	trx_t*		trx,			/*!< in: transaction */
+	fts_table_t*	fts_table,		/*!< in: aux table */
+	fts_doc_ids_t*	doc_ids);		/*!< in: For collecting
+						doc ids */
+/****************************************************************//**
+This function brings FTS index in sync when FTS index is first
+used. There are documents that have not yet sync-ed to auxiliary
+tables from last server abnormally shutdown, we will need to bring
+such document into FTS cache before any further operations
+@return TRUE if all OK */
+UNIV_INTERN
+ibool
+fts_init_index(
+/*===========*/
+	dict_table_t*	table,			/*!< in: Table with FTS */
+	ibool		has_cache_lock);	/*!< in: Whether we already
+						have cache lock */
+/*******************************************************************//**
+Add a newly create index in FTS cache */
+UNIV_INTERN
+void
+fts_add_index(
+/*==========*/
+	dict_index_t*	index,			/*!< FTS index to be added */
+	dict_table_t*	table);			/*!< table */
+
+/*******************************************************************//**
+Drop auxiliary tables related to an FTS index
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+fts_drop_index(
+/*===========*/
+	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
+	dict_index_t*	index,	/*!< in: Index to be dropped */
+	trx_t*		trx)	/*!< in: Transaction for the drop */
+	__attribute__((nonnull));
+
+/****************************************************************//**
+Rename auxiliary tables for all fts index for a table
+@return DB_SUCCESS or error code */
+
+dberr_t
+fts_rename_aux_tables(
+/*==================*/
+	dict_table_t*	table,		/*!< in: user Table */
+	const char*	new_name,	/*!< in: new table name */
+	trx_t*		trx);		/*!< in: transaction */
+
+/*******************************************************************//**
+Check indexes in the fts->indexes is also present in index cache and
+table->indexes list
+@return TRUE if all indexes match */
+UNIV_INTERN
+ibool
+fts_check_cached_index(
+/*===================*/
+	dict_table_t*	table);  /*!< in: Table where indexes are dropped */
+#endif /*!< fts0fts.h */
+
diff --git a/storage/innobase/include/fts0opt.h b/storage/innobase/include/fts0opt.h
new file mode 100644
index 00000000000..92eaf8270d2
--- /dev/null
+++ b/storage/innobase/include/fts0opt.h
@@ -0,0 +1,37 @@
+/*****************************************************************************
+
+Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0opt.h
+Full Text Search optimize thread
+
+Created 2011-02-15 Jimmy Yang
+***********************************************************************/
+#ifndef INNODB_FTS0OPT_H
+#define INNODB_FTS0OPT_H
+
+/********************************************************************
+Callback function to fetch the rows in an FTS INDEX record. */
+UNIV_INTERN
+ibool
+fts_optimize_index_fetch_node(
+/*==========================*/
+                                        /* out: always returns non-NULL */
+        void*           row,		/* in: sel_node_t* */
+        void*           user_arg);	/* in: pointer to ib_vector_t */
+#endif
diff --git a/storage/innobase/include/fts0pars.h b/storage/innobase/include/fts0pars.h
new file mode 100644
index 00000000000..8108e811599
--- /dev/null
+++ b/storage/innobase/include/fts0pars.h
@@ -0,0 +1,72 @@
+/* A Bison parser, made by GNU Bison 2.5.  */
+
+/* Bison interface for Yacc-like parsers in C
+   
+      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+   
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+   
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     FTS_OPER = 258,
+     FTS_TEXT = 259,
+     FTS_TERM = 260,
+     FTS_NUMB = 261
+   };
+#endif
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 2068 of yacc.c  */
+#line 61 "fts0pars.y"
+
+	int			oper;
+	fts_ast_string_t*	token;
+	fts_ast_node_t*		node;
+
+
+
+/* Line 2068 of yacc.c  */
+#line 64 "fts0pars.hh"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+
+
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
new file mode 100644
index 00000000000..b4d9e1d41ec
--- /dev/null
+++ b/storage/innobase/include/fts0priv.h
@@ -0,0 +1,653 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0priv.h
+Full text search internal header file
+
+Created 2011/09/02 Sunny Bains
+***********************************************************************/
+
+#ifndef INNOBASE_FTS0PRIV_H
+#define INNOBASE_FTS0PRIV_H
+
+#include "dict0dict.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "que0types.h"
+#include "fts0types.h"
+
+/* The various states of the FTS sub system pertaining to a table with
+FTS indexes defined on it. */
+enum fts_table_state_enum {
+					/* !<This must be 0 since we insert
+					a hard coded '0' at create time
+					to the config table */
+
+	FTS_TABLE_STATE_RUNNING = 0,	/*!< Auxiliary tables created OK */
+
+	FTS_TABLE_STATE_OPTIMIZING,	/*!< This is a substate of RUNNING */
+
+	FTS_TABLE_STATE_DELETED		/*!< All aux tables to be dropped when
+					it's safe to do so */
+};
+
+typedef enum fts_table_state_enum fts_table_state_t;
+
+/** The default time to wait for the background thread (in microsecnds). */
+#define FTS_MAX_BACKGROUND_THREAD_WAIT		10000
+
+/** Maximum number of iterations to wait before we complain */
+#define FTS_BACKGROUND_THREAD_WAIT_COUNT	1000
+
+/** The maximum length of the config table's value column in bytes */
+#define FTS_MAX_CONFIG_NAME_LEN			64
+
+/** The maximum length of the config table's value column in bytes */
+#define FTS_MAX_CONFIG_VALUE_LEN		1024
+
+/** Approx. upper limit of ilist length in bytes. */
+#define FTS_ILIST_MAX_SIZE			(64 * 1024)
+
+/** FTS config table name parameters */
+
+/** The number of seconds after which an OPTIMIZE run will stop */
+#define FTS_OPTIMIZE_LIMIT_IN_SECS	"optimize_checkpoint_limit"
+
+/** The next doc id */
+#define FTS_SYNCED_DOC_ID		"synced_doc_id"
+
+/** The last word that was OPTIMIZED */
+#define FTS_LAST_OPTIMIZED_WORD		"last_optimized_word"
+
+/** Total number of documents that have been deleted. The next_doc_id
+minus this count gives us the total number of documents. */
+#define FTS_TOTAL_DELETED_COUNT		"deleted_doc_count"
+
+/** Total number of words parsed from all documents */
+#define FTS_TOTAL_WORD_COUNT		"total_word_count"
+
+/** Start of optimize of an FTS index */
+#define FTS_OPTIMIZE_START_TIME		"optimize_start_time"
+
+/** End of optimize for an FTS index */
+#define FTS_OPTIMIZE_END_TIME		"optimize_end_time"
+
+/** User specified stopword table name */
+#define	FTS_STOPWORD_TABLE_NAME		"stopword_table_name"
+
+/** Whether to use (turn on/off) stopword */
+#define	FTS_USE_STOPWORD		"use_stopword"
+
+/** State of the FTS system for this table. It can be one of
+ RUNNING, OPTIMIZING, DELETED. */
+#define FTS_TABLE_STATE			"table_state"
+
+/** The minimum length of an FTS auxiliary table names's id component
+e.g., For an auxiliary table name
+
+	FTS_<TABLE_ID>_SUFFIX
+
+This constant is for the minimum length required to store the <TABLE_ID>
+component.
+*/
+#define FTS_AUX_MIN_TABLE_ID_LENGTH	48
+
+/** Maximum length of an integer stored in the config table value column. */
+#define FTS_MAX_INT_LEN			32
+
+/******************************************************************//**
+Parse an SQL string. %s is replaced with the table's id.
+@return query graph */
+UNIV_INTERN
+que_t*
+fts_parse_sql(
+/*==========*/
+	fts_table_t*	fts_table,	/*!< in: FTS aux table */
+	pars_info_t*	info,		/*!< in: info struct, or NULL */
+	const char*	sql)		/*!< in: SQL string to evaluate */
+	__attribute__((nonnull(3), malloc, warn_unused_result));
+/******************************************************************//**
+Evaluate a parsed SQL statement
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_eval_sql(
+/*=========*/
+	trx_t*		trx,		/*!< in: transaction */
+	que_t*		graph)		/*!< in: Parsed statement */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Construct the name of an ancillary FTS table for the given table.
+@return own: table name, must be freed with mem_free() */
+UNIV_INTERN
+char*
+fts_get_table_name(
+/*===============*/
+	const fts_table_t*
+			fts_table)	/*!< in: FTS aux table info */
+	__attribute__((nonnull, malloc, warn_unused_result));
+/******************************************************************//**
+Construct the column specification part of the SQL string for selecting the
+indexed FTS columns for the given table. Adds the necessary bound
+ids to the given 'info' and returns the SQL string. Examples:
+
+One indexed column named "text":
+
+ "$sel0",
+ info/ids: sel0 -> "text"
+
+Two indexed columns named "subject" and "content":
+
+ "$sel0, $sel1",
+ info/ids: sel0 -> "subject", sel1 -> "content",
+@return heap-allocated WHERE string */
+UNIV_INTERN
+const char*
+fts_get_select_columns_str(
+/*=======================*/
+	dict_index_t*	index,		/*!< in: FTS index */
+	pars_info_t*	info,		/*!< in/out: parser info */
+	mem_heap_t*	heap)		/*!< in: memory heap */
+	__attribute__((nonnull, warn_unused_result));
+
+/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
+we want to get Doc whose ID is equal to or greater or smaller than supplied
+ID */
+#define	FTS_FETCH_DOC_BY_ID_EQUAL	1
+#define	FTS_FETCH_DOC_BY_ID_LARGE	2
+#define	FTS_FETCH_DOC_BY_ID_SMALL	3
+
+/*************************************************************//**
+Fetch document (= a single row's indexed text) with the given
+document id.
+@return: DB_SUCCESS if fetch is successful, else error */
+UNIV_INTERN
+dberr_t
+fts_doc_fetch_by_doc_id(
+/*====================*/
+	fts_get_doc_t*	get_doc,	/*!< in: state */
+	doc_id_t	doc_id,		/*!< in: id of document to fetch */
+	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
+					or NULL */
+	ulint		option,         /*!< in: search option, if it is
+                                        greater than doc_id or equal */
+	fts_sql_callback
+			callback,	/*!< in: callback to read
+					records */
+	void*		arg)		/*!< in: callback arg */
+	__attribute__((nonnull(6)));
+
+/*******************************************************************//**
+Callback function for fetch that stores the text of an FTS document,
+converting each column to UTF-16.
+@return always FALSE */
+UNIV_INTERN
+ibool
+fts_query_expansion_fetch_doc(
+/*==========================*/
+	void*		row,		/*!< in: sel_node_t* */
+	void*		user_arg)	/*!< in: fts_doc_t* */
+	__attribute__((nonnull));
+/********************************************************************
+Write out a single word's data as new entry/entries in the INDEX table.
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fts_write_node(
+/*===========*/
+	trx_t*		trx,		/*!< in: transaction */
+	que_t**		graph,		/*!< in: query graph */
+	fts_table_t*	fts_table,	/*!< in: the FTS aux index */
+	fts_string_t*	word,		/*!< in: word in UTF-8 */
+	fts_node_t*	node)		/*!< in: node columns */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Tokenize a document. */
+UNIV_INTERN
+void
+fts_tokenize_document(
+/*==================*/
+	fts_doc_t*	doc,		/*!< in/out: document to
+					tokenize */
+	fts_doc_t*	result)		/*!< out: if provided, save
+					result tokens here */
+	__attribute__((nonnull(1)));
+
+/*******************************************************************//**
+Continue to tokenize a document. */
+UNIV_INTERN
+void
+fts_tokenize_document_next(
+/*=======================*/
+	fts_doc_t*	doc,		/*!< in/out: document to
+					tokenize */
+	ulint		add_pos,	/*!< in: add this position to all
+					tokens from this tokenization */
+	fts_doc_t*	result)		/*!< out: if provided, save
+					result tokens here */
+	__attribute__((nonnull(1)));
+/******************************************************************//**
+Initialize a document. */
+UNIV_INTERN
+void
+fts_doc_init(
+/*=========*/
+	fts_doc_t*	doc)		/*!< in: doc to initialize */
+	__attribute__((nonnull));
+
+/******************************************************************//**
+Do a binary search for a doc id in the array
+@return +ve index if found -ve index where it should be
+        inserted if not found */
+UNIV_INTERN
+int
+fts_bsearch(
+/*========*/
+	fts_update_t*	array,		/*!< in: array to sort */
+	int		lower,		/*!< in: lower bound of array*/
+	int		upper,		/*!< in: upper bound of array*/
+	doc_id_t	doc_id)		/*!< in: doc id to lookup */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Free document. */
+UNIV_INTERN
+void
+fts_doc_free(
+/*=========*/
+	fts_doc_t*	doc)		/*!< in: document */
+	__attribute__((nonnull));
+/******************************************************************//**
+Free fts_optimizer_word_t instanace.*/
+UNIV_INTERN
+void
+fts_word_free(
+/*==========*/
+	fts_word_t*	word)		/*!< in: instance to free.*/
+	__attribute__((nonnull));
+/******************************************************************//**
+Read the rows from the FTS inde
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_index_fetch_nodes(
+/*==================*/
+	trx_t*		trx,		/*!< in: transaction */
+	que_t**		graph,		/*!< in: prepared statement */
+	fts_table_t*	fts_table,	/*!< in: FTS aux table */
+	const fts_string_t*
+			word,		/*!< in: the word to fetch */
+	fts_fetch_t*	fetch)		/*!< in: fetch callback.*/
+	__attribute__((nonnull));
+/******************************************************************//**
+Create a fts_optimizer_word_t instance.
+@return new instance */
+UNIV_INTERN
+fts_word_t*
+fts_word_init(
+/*==========*/
+	fts_word_t*	word,		/*!< in: word to initialize */
+	byte*		utf8,		/*!< in: UTF-8 string */
+	ulint		len)		/*!< in: length of string in bytes */
+	__attribute__((nonnull));
+/******************************************************************//**
+Compare two fts_trx_table_t instances, we actually compare the
+table id's here.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+	const void*	v1,		/*!< in: id1 */
+	const void*	v2)		/*!< in: id2 */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Compare a table id with a trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+	const void*	p1,		/*!< in: id1 */
+	const void*	p2)		/*!< in: id2 */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Commit a transaction.
+@return DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+fts_sql_commit(
+/*===========*/
+	trx_t*		trx)		/*!< in: transaction */
+	__attribute__((nonnull));
+/******************************************************************//**
+Rollback a transaction.
+@return DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+fts_sql_rollback(
+/*=============*/
+	trx_t*		trx)		/*!< in: transaction */
+	__attribute__((nonnull));
+/******************************************************************//**
+Parse an SQL string. %s is replaced with the table's id. Don't acquire
+the dict mutex
+@return query graph */
+UNIV_INTERN
+que_t*
+fts_parse_sql_no_dict_lock(
+/*=======================*/
+	fts_table_t*	fts_table,	/*!< in: table with FTS index */
+	pars_info_t*	info,		/*!< in: parser info */
+	const char*	sql)		/*!< in: SQL string to evaluate */
+	__attribute__((nonnull(3), malloc, warn_unused_result));
+/******************************************************************//**
+Get value from config table. The caller must ensure that enough
+space is allocated for value to hold the column contents
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_value(
+/*=================*/
+	trx_t*		trx,		/* transaction */
+	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
+	const char*	name,		/*!< in: get config value for
+					this parameter name */
+	fts_string_t*	value)		/*!< out: value read from
+					config table */
+	__attribute__((nonnull));
+/******************************************************************//**
+Get value specific to an FTS index from the config table. The caller
+must ensure that enough space is allocated for value to hold the
+column contents.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_index_value(
+/*=======================*/
+	trx_t*		trx,		/*!< transaction */
+	dict_index_t*	index,		/*!< in: index */
+	const char*	param,		/*!< in: get config value for
+					this parameter name */
+	fts_string_t*	value)		/*!< out: value read from
+					config table */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Set the value in the config table for name.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_value(
+/*=================*/
+	trx_t*		trx,		/*!< transaction */
+	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
+	const char*	name,		/*!< in: get config value for
+					this parameter name */
+	const fts_string_t*
+			value)		/*!< in: value to update */
+	__attribute__((nonnull));
+/****************************************************************//**
+Set an ulint value in the config table.
+@return DB_SUCCESS if all OK else error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_ulint(
+/*=================*/
+	trx_t*		trx,		/*!< in: transaction */
+	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
+	const char*	name,		/*!< in: param name */
+	ulint		int_value)	/*!< in: value */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Set the value specific to an FTS index in the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_index_value(
+/*=======================*/
+	trx_t*		trx,		/*!< transaction */
+	dict_index_t*	index,		/*!< in: index */
+	const char*	param,		/*!< in: get config value for
+					this parameter name */
+	fts_string_t*	value)		/*!< out: value read from
+					config table */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Increment the value in the config table for column name.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_increment_value(
+/*=======================*/
+	trx_t*		trx,		/*!< transaction */
+	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
+	const char*	name,		/*!< in: increment config value
+					for this parameter name */
+	ulint		delta)		/*!< in: increment by this much */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Increment the per index value in the config table for column name.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_increment_index_value(
+/*=============================*/
+	trx_t*		trx,		/*!< transaction */
+	dict_index_t*	index,		/*!< in: FTS index */
+	const char*	name,		/*!< in: increment config value
+					for this parameter name */
+	ulint		delta)		/*!< in: increment by this much */
+	__attribute__((nonnull));
+/******************************************************************//**
+Get an ulint value from the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_index_ulint(
+/*=======================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_index_t*	index,		/*!< in: FTS index */
+	const char*	name,		/*!< in: param name */
+	ulint*		int_value)	/*!< out: value */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Set an ulint value int the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_set_index_ulint(
+/*=======================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_index_t*	index,		/*!< in: FTS index */
+	const char*	name,		/*!< in: param name */
+	ulint		int_value)	/*!< in: value */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Get an ulint value from the config table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_config_get_ulint(
+/*=================*/
+	trx_t*		trx,		/*!< in: transaction */
+	fts_table_t*	fts_table,	/*!< in: the indexed FTS table */
+	const char*	name,		/*!< in: param name */
+	ulint*		int_value)	/*!< out: value */
+	__attribute__((nonnull));
+/******************************************************************//**
+Search cache for word.
+@return the word node vector if found else NULL */
+UNIV_INTERN
+const ib_vector_t*
+fts_cache_find_word(
+/*================*/
+	const fts_index_cache_t*
+			index_cache,	/*!< in: cache to search */
+	const fts_string_t*
+			text)		/*!< in: word to search for */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Check cache for deleted doc id.
+@return TRUE if deleted */
+UNIV_INTERN
+ibool
+fts_cache_is_deleted_doc_id(
+/*========================*/
+	const fts_cache_t*
+			cache,		/*!< in: cache ito search */
+	doc_id_t	doc_id)		/*!< in: doc id to search for */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Append deleted doc ids to vector and sort the vector. */
+UNIV_INTERN
+void
+fts_cache_append_deleted_doc_ids(
+/*=============================*/
+	const fts_cache_t*
+			cache,		/*!< in: cache to use */
+	ib_vector_t*	vector);	/*!< in: append to this vector */
+/******************************************************************//**
+Wait for the background thread to start. We poll to detect change
+of state, which is acceptable, since the wait should happen only
+once during startup.
+@return true if the thread started else FALSE (i.e timed out) */
+UNIV_INTERN
+ibool
+fts_wait_for_background_thread_to_start(
+/*====================================*/
+	dict_table_t*	table,		/*!< in: table to which the thread
+					is attached */
+	ulint		max_wait);	/*!< in: time in microseconds, if set
+					to 0 then it disables timeout
+					checking */
+#ifdef FTS_DOC_STATS_DEBUG
+/******************************************************************//**
+Get the total number of words in the FTS for a particular FTS index.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fts_get_total_word_count(
+/*=====================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_index_t*	index,		/*!< in: for this index */
+	ulint*		total)		/*!< out: total words */
+	__attribute__((nonnull, warn_unused_result));
+#endif
+/******************************************************************//**
+Search the index specific cache for a particular FTS index.
+@return the index specific cache else NULL */
+UNIV_INTERN
+fts_index_cache_t*
+fts_find_index_cache(
+/*================*/
+	const fts_cache_t*
+			cache,		/*!< in: cache to search */
+	const dict_index_t*
+			index)		/*!< in: index to search for */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Write the table id to the given buffer (including final NUL). Buffer must be
+at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
+@return	number of bytes written */
+UNIV_INLINE
+int
+fts_write_object_id(
+/*================*/
+	ib_id_t		id,		/*!< in: a table/index id */
+	char*		str,		/*!< in: buffer to write the id to */
+	bool		hex_format __attribute__((unused)))
+					/*!< in: true for fixed hex format,
+					false for old ambiguous format */
+	__attribute__((nonnull));
+/******************************************************************//**
+Read the table id from the string generated by fts_write_object_id().
+@return TRUE if parse successful */
+UNIV_INLINE
+ibool
+fts_read_object_id(
+/*===============*/
+	ib_id_t*	id,		/*!< out: a table id */
+	const char*	str)		/*!< in: buffer to read from */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Get the table id.
+@return number of bytes written */
+UNIV_INTERN
+int
+fts_get_table_id(
+/*=============*/
+	const fts_table_t*
+			fts_table,	/*!< in: FTS Auxiliary table */
+	char*		table_id)	/*!< out: table id, must be at least
+					FTS_AUX_MIN_TABLE_ID_LENGTH bytes
+					long */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Add the table to add to the OPTIMIZER's list. */
+UNIV_INTERN
+void
+fts_optimize_add_table(
+/*===================*/
+	dict_table_t*	table)		/*!< in: table to add */
+	__attribute__((nonnull));
+/******************************************************************//**
+Optimize a table. */
+UNIV_INTERN
+void
+fts_optimize_do_table(
+/*==================*/
+	dict_table_t*	table)		/*!< in: table to optimize */
+	__attribute__((nonnull));
+/******************************************************************//**
+Construct the prefix name of an FTS table.
+@return own: table name, must be freed with mem_free() */
+UNIV_INTERN
+char*
+fts_get_table_name_prefix(
+/*======================*/
+	const fts_table_t*
+			fts_table)	/*!< in: Auxiliary table type */
+	__attribute__((nonnull, malloc, warn_unused_result));
+/******************************************************************//**
+Add node positions. */
+UNIV_INTERN
+void
+fts_cache_node_add_positions(
+/*=========================*/
+	fts_cache_t*	cache,		/*!< in: cache */
+	fts_node_t*	node,		/*!< in: word node */
+	doc_id_t	doc_id,		/*!< in: doc id */
+	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
+	__attribute__((nonnull(2,4)));
+
+/******************************************************************//**
+Create the config table name for retrieving index specific value.
+@return index config parameter name */
+UNIV_INTERN
+char*
+fts_config_create_index_param_name(
+/*===============================*/
+	const char*		param,		/*!< in: base name of param */
+	const dict_index_t*	index)		/*!< in: index for config */
+	__attribute__((nonnull, malloc, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "fts0priv.ic"
+#endif
+
+#endif /* INNOBASE_FTS0PRIV_H */
diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic
new file mode 100644
index 00000000000..2d07c60f980
--- /dev/null
+++ b/storage/innobase/include/fts0priv.ic
@@ -0,0 +1,130 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0priv.ic
+Full text search internal header file
+
+Created 2011/11/12 Sunny Bains
+***********************************************************************/
+
+/******************************************************************//**
+Write the table id to the given buffer (including final NUL). Buffer must be
+at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
+@return	number of bytes written */
+UNIV_INLINE
+int
+fts_write_object_id(
+/*================*/
+	ib_id_t		id,		/* in: a table/index id */
+	char*		str,		/* in: buffer to write the id to */
+	bool		hex_format __attribute__((unused)))
+					/* in: true for fixed hex format,
+					false for old ambiguous format */
+{
+
+#ifdef _WIN32
+
+	DBUG_EXECUTE_IF("innodb_test_wrong_non_windows_fts_aux_table_name",
+			return(sprintf(str, UINT64PFx, id)););
+
+	/* Use this to construct old(5.6.14 and 5.7.3) windows
+	ambiguous aux table names */
+	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
+			return(sprintf(str, "%016llu", id)););
+
+#else /* _WIN32 */
+
+	/* Use this to construct old(5.6.14 and 5.7.3) windows
+	ambiguous aux table names */
+	DBUG_EXECUTE_IF("innodb_test_wrong_windows_fts_aux_table_name",
+			return(sprintf(str, "%016"PRIu64, id)););
+
+	DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
+			return(sprintf(str, UINT64PFx, id)););
+
+#endif /* _WIN32 */
+
+	/* As above, but this is only for those tables failing to rename. */
+	if (!hex_format) {
+#ifdef _WIN32
+		// FIXME: Use ut_snprintf(), so does following one.
+		return(sprintf(str, "%016llu", id));
+#else /* _WIN32 */
+		return(sprintf(str, "%016"PRIu64, id));
+#endif /* _WIN32 */
+	}
+
+	return(sprintf(str, UINT64PFx, id));
+}
+
+/******************************************************************//**
+Read the table id from the string generated by fts_write_object_id().
+@return	TRUE if parse successful */
+UNIV_INLINE
+ibool
+fts_read_object_id(
+/*===============*/
+	ib_id_t*	id,		/* out: an id */
+	const char*	str)		/* in: buffer to read from */
+{
+	/* NOTE: this func doesn't care about whether current table
+	is set with HEX_NAME, the user of the id read here will check
+	if the id is HEX or DEC and do the right thing with it. */
+	return(sscanf(str, UINT64PFx, id) == 1);
+}
+
+/******************************************************************//**
+Compare two fts_trx_table_t instances.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2  */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
+	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+	return((table1->id > table2->id)
+	       ? 1
+	       : (table1->id == table2->id)
+		  ? 0
+		  : -1);
+}
+
+/******************************************************************//**
+Compare a table id with a fts_trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const ullint* table_id = (const ullint*) p1;
+	const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+	return((*table_id > table2->id)
+	       ? 1
+	       : (*table_id == table2->id)
+		  ? 0
+		  : -1);
+}
diff --git a/storage/innobase/include/fts0tlex.h b/storage/innobase/include/fts0tlex.h
new file mode 100644
index 00000000000..f91533803e8
--- /dev/null
+++ b/storage/innobase/include/fts0tlex.h
@@ -0,0 +1,349 @@
+#ifndef fts0tHEADER_H
+#define fts0tHEADER_H 1
+#define fts0tIN_HEADER 1
+
+#line 6 "../include/fts0tlex.h"
+
+#line 8 "../include/fts0tlex.h"
+
+#define  YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with  platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types. 
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t; 
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN               (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX               (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX              (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX             (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX             (4294967295U)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else	/* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif	/* defined (__STDC__) */
+#endif	/* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+   are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	int yy_n_chars;
+
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+
+    int yy_bs_lineno; /**< The line count. */
+    int yy_bs_column; /**< The column count. */
+    
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+
+	int yy_buffer_status;
+
+	};
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+void fts0trestart (FILE *input_file ,yyscan_t yyscanner );
+void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
+void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
+void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
+void fts0tpop_buffer_state (yyscan_t yyscanner );
+
+YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
+YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
+
+void *fts0talloc (yy_size_t ,yyscan_t yyscanner );
+void *fts0trealloc (void *,yy_size_t ,yyscan_t yyscanner );
+void fts0tfree (void * ,yyscan_t yyscanner );
+
+/* Begin user sect3 */
+
+#define fts0twrap(n) 1
+#define YY_SKIP_YYWRAP
+
+#define yytext_ptr yytext_r
+
+#ifdef YY_HEADER_EXPORT_START_CONDITIONS
+#define INITIAL 0
+
+#endif
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+int fts0tlex_init (yyscan_t* scanner);
+
+int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
+
+/* Accessor methods to globals.
+   These are made visible to non-reentrant scanners for convenience. */
+
+int fts0tlex_destroy (yyscan_t yyscanner );
+
+int fts0tget_debug (yyscan_t yyscanner );
+
+void fts0tset_debug (int debug_flag ,yyscan_t yyscanner );
+
+YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner );
+
+void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
+
+FILE *fts0tget_in (yyscan_t yyscanner );
+
+void fts0tset_in  (FILE * in_str ,yyscan_t yyscanner );
+
+FILE *fts0tget_out (yyscan_t yyscanner );
+
+void fts0tset_out  (FILE * out_str ,yyscan_t yyscanner );
+
+int fts0tget_leng (yyscan_t yyscanner );
+
+char *fts0tget_text (yyscan_t yyscanner );
+
+int fts0tget_lineno (yyscan_t yyscanner );
+
+void fts0tset_lineno (int line_number ,yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int fts0twrap (yyscan_t yyscanner );
+#else
+extern int fts0twrap (yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int fts0tlex (yyscan_t yyscanner);
+
+#define YY_DECL int fts0tlex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+#undef YY_NEW_FILE
+#undef YY_FLUSH_BUFFER
+#undef yy_set_bol
+#undef yy_new_buffer
+#undef yy_set_interactive
+#undef YY_DO_BEFORE_ACTION
+
+#ifdef YY_DECL_IS_OURS
+#undef YY_DECL_IS_OURS
+#undef YY_DECL
+#endif
+
+#line 68 "fts0tlex.l"
+
+
+#line 348 "../include/fts0tlex.h"
+#undef fts0tIN_HEADER
+#endif /* fts0tHEADER_H */
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
new file mode 100644
index 00000000000..64677428331
--- /dev/null
+++ b/storage/innobase/include/fts0types.h
@@ -0,0 +1,474 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0types.h
+Full text search types file
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0TYPES_H
+#define INNOBASE_FTS0TYPES_H
+
+#include "que0types.h"
+#include "ut0byte.h"
+#include "fut0fut.h"
+#include "ut0rbt.h"
+#include "fts0fts.h"
+
+/** Types used within FTS. */
+struct fts_que_t;
+struct fts_node_t;
+struct fts_utf8_str_t;
+
+/** Callbacks used within FTS. */
+typedef pars_user_func_cb_t fts_sql_callback;
+typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len);
+
+/** Statistics relevant to a particular document, used during retrieval. */
+struct fts_doc_stats_t {
+	doc_id_t	doc_id;		/*!< Document id */
+	ulint		word_count;	/*!< Total words in the document */
+};
+
+/** It's main purpose is to store the SQL prepared statements that
+are required to retrieve a document from the database. */
+struct fts_get_doc_t {
+	fts_index_cache_t*
+			index_cache;	/*!< The index cache instance */
+
+					/*!< Parsed sql statement */
+	que_t*		get_document_graph;
+	fts_cache_t*	cache;		/*!< The parent cache */
+};
+
+/** Since we can have multiple FTS indexes on a table, we keep a
+per index cache of words etc. */
+struct fts_index_cache_t {
+	dict_index_t*	index;		/*!< The FTS index instance */
+
+	ib_rbt_t*	words;		/*!< Nodes; indexed by fts_string_t*,
+					cells are fts_tokenizer_word_t*.*/
+
+	ib_vector_t*	doc_stats;	/*!< Array of the fts_doc_stats_t
+					contained in the memory buffer.
+					Must be in sorted order (ascending).
+					The  ideal choice is an rb tree but
+					the rb tree imposes a space overhead
+					that we can do without */
+
+	que_t**		ins_graph;	/*!< Insert query graphs */
+
+	que_t**		sel_graph;	/*!< Select query graphs */
+	CHARSET_INFO*	charset;	/*!< charset */
+};
+
+/** For supporting the tracking of updates on multiple FTS indexes we need
+to track which FTS indexes need to be updated. For INSERT and DELETE we
+update all fts indexes. */
+struct fts_update_t {
+	doc_id_t	doc_id;		/*!< The doc id affected */
+
+	ib_vector_t*	fts_indexes;	/*!< The FTS indexes that need to be
+					updated. A NULL value means all
+					indexes need to be updated.  This
+					vector is not allocated on the heap
+					and so must be freed explicitly,
+					when we are done with it */
+};
+
+/** Stop word control infotmation. */
+struct fts_stopword_t {
+	ulint		status;		/*!< Status of the stopword tree */
+	ib_alloc_t*	heap;		/*!< The memory allocator to use */
+	ib_rbt_t*	cached_stopword;/*!< This stores all active stopwords */
+	CHARSET_INFO*	charset;	/*!< charset for stopword */
+};
+
+/** The SYNC state of the cache. There is one instance of this struct
+associated with each ADD thread. */
+struct fts_sync_t {
+	trx_t*		trx;		/*!< The transaction used for SYNCing
+					the cache to disk */
+	dict_table_t*	table;		/*!< Table with FTS index(es) */
+	ulint		max_cache_size;	/*!< Max size in bytes of the cache */
+	ibool		cache_full;	/*!< flag, when true it indicates that
+					we need to sync the cache to disk */
+	ulint		lower_index;	/*!< the start index of the doc id
+					vector from where to start adding
+					documents to the FTS cache */
+	ulint		upper_index;	/*!< max index of the doc id vector to
+					add to the FTS cache */
+	ibool		interrupted;	/*!< TRUE if SYNC was interrupted */
+	doc_id_t	min_doc_id;	/*!< The smallest doc id added to the
+					cache. It should equal to
+					doc_ids[lower_index] */
+	doc_id_t	max_doc_id;	/*!< The doc id at which the cache was
+					noted as being full, we use this to
+					set the upper_limit field */
+        ib_time_t	start_time;	/*!< SYNC start time */
+};
+
+/** The cache for the FTS system. It is a memory-based inverted index
+that new entries are added to, until it grows over the configured maximum
+size, at which time its contents are written to the INDEX table. */
+struct fts_cache_t {
+	rw_lock_t	lock;		/*!< lock protecting all access to the
+					memory buffer. FIXME: this needs to
+					be our new upgrade-capable rw-lock */
+
+	rw_lock_t	init_lock;	/*!< lock used for the cache
+					intialization, it has different
+					SYNC level as above cache lock */
+
+	ib_mutex_t	optimize_lock;	/*!< Lock for OPTIMIZE */
+
+	ib_mutex_t	deleted_lock;	/*!< Lock covering deleted_doc_ids */
+
+	ib_mutex_t	doc_id_lock;	/*!< Lock covering Doc ID */
+
+	ib_vector_t*	deleted_doc_ids;/*!< Array of deleted doc ids, each
+					element is of type fts_update_t */
+
+	ib_vector_t*	indexes;	/*!< We store the stats and inverted
+					index for the individual FTS indexes
+					in this vector. Each element is
+					an instance of fts_index_cache_t */
+
+	ib_vector_t*	get_docs;	/*!< information required to read
+					the document from the table. Each
+					element is of type fts_doc_t */
+
+	ulint		total_size;	/*!< total size consumed by the ilist
+					field of all nodes. SYNC is run
+					whenever this gets too big */
+	fts_sync_t*	sync;		/*!< sync structure to sync data to
+					disk */
+	ib_alloc_t*	sync_heap;	/*!< The heap allocator, for indexes
+					and deleted_doc_ids, ie. transient
+					objects, they are recreated after
+					a SYNC is completed */
+
+
+	ib_alloc_t*	self_heap;	/*!< This heap is the heap out of
+					which an instance of the cache itself
+					was created. Objects created using
+					this heap will last for the lifetime
+					of the cache */
+
+	doc_id_t	next_doc_id;	/*!< Next doc id */
+
+	doc_id_t	synced_doc_id;	/*!< Doc ID sync-ed to CONFIG table */
+
+	doc_id_t	first_doc_id;	/*!< first doc id since this table
+					was opened */
+
+	ulint		deleted;	/*!< Number of doc ids deleted since
+					last optimized. This variable is
+					covered by deleted_lock */
+
+	ulint		added;		/*!< Number of doc ids added since last
+					optimized. This variable is covered by
+					the deleted lock */
+
+	fts_stopword_t	stopword_info;	/*!< Cached stopwords for the FTS */
+	mem_heap_t*	cache_heap;	/*!< Cache Heap */
+};
+
+/** Columns of the FTS auxiliary INDEX table */
+struct fts_node_t {
+	doc_id_t	first_doc_id;	/*!< First document id in ilist. */
+
+	doc_id_t	last_doc_id;	/*!< Last document id in ilist. */
+
+	byte*		ilist;		/*!< Binary list of documents & word
+					positions the token appears in.
+					TODO: For now, these are simply
+					ut_malloc'd, but if testing shows
+					that they waste memory unacceptably, a
+					special memory allocator will have
+					to be written */
+
+	ulint		doc_count;	/*!< Number of doc ids in ilist */
+
+	ulint		ilist_size;	/*!< Used size of ilist in bytes. */
+
+	ulint		ilist_size_alloc;
+					/*!< Allocated size of ilist in
+					bytes */
+};
+
+/** A tokenizer word. Contains information about one word. */
+struct fts_tokenizer_word_t {
+	fts_string_t	text;		/*!< Token text. */
+
+	ib_vector_t*	nodes;		/*!< Word node ilists, each element is
+					of type fts_node_t */
+};
+
+/** Word text plus it's array of nodes as on disk in FTS index */
+struct fts_word_t {
+	fts_string_t	text;		/*!< Word value in UTF-8 */
+	ib_vector_t*	nodes;		/*!< Nodes read from disk */
+
+	ib_alloc_t*	heap_alloc;	/*!< For handling all allocations */
+};
+
+/** Callback for reading and filtering nodes that are read from FTS index */
+struct fts_fetch_t {
+	void*		read_arg;	/*!< Arg for the sql_callback */
+
+	fts_sql_callback
+			read_record;	/*!< Callback for reading index
+					record */
+	ulint		total_memory;	/*!< Total memory used */
+};
+
+/** For horizontally splitting an FTS auxiliary index */
+struct fts_index_selector_t {
+	ulint		value;		/*!< Character value at which
+					to split */
+
+	const char*	suffix;		/*!< FTS aux index suffix */
+};
+
+/** This type represents a single document. */
+struct fts_doc_t {
+	fts_string_t	text;		/*!< document text */
+
+	ibool		found;		/*!< TRUE if the document was found
+					successfully in the database */
+
+	ib_rbt_t*	tokens;		/*!< This is filled when the document
+					is tokenized. Tokens; indexed by
+					fts_string_t*, cells are of type
+					fts_token_t* */
+
+	ib_alloc_t*	self_heap;	/*!< An instance of this type is
+					allocated from this heap along
+					with any objects that have the
+					same lifespan, most notably
+					the vector of token positions */
+	CHARSET_INFO*	charset;	/*!< Document's charset info */
+};
+
+/** A token and its positions within a document. */
+struct fts_token_t {
+	fts_string_t	text;		/*!< token text */
+
+	ib_vector_t*	positions;	/*!< an array of the positions the
+					token is found in; each item is
+					actually an ulint. */
+};
+
+/** It's defined in fts/fts0fts.c */
+extern const fts_index_selector_t fts_index_selector[];
+
+/******************************************************************//**
+Compare two UTF-8 strings. */
+UNIV_INLINE
+int
+fts_utf8_string_cmp(
+/*================*/
+						/*!< out:
+						< 0 if n1 < n2,
+						0 if n1 == n2,
+						> 0 if n1 > n2 */
+	const void*	p1,			/*!< in: key */
+	const void*	p2);			/*!< in: node */
+
+/******************************************************************//**
+Compare two UTF-8 strings, and return match (0) if
+passed in "key" value equals or is the prefix of the "node" value. */
+UNIV_INLINE
+int
+fts_utf8_string_cmp_prefix(
+/*=======================*/
+						/*!< out:
+						< 0 if n1 < n2,
+						0 if n1 == n2,
+						> 0 if n1 > n2 */
+	const void*	p1,			/*!< in: key */
+	const void*	p2);			/*!< in: node */
+
+/******************************************************************//**
+Compare two fts_trx_row_t instances doc_ids. */
+UNIV_INLINE
+int
+fts_trx_row_doc_id_cmp(
+/*===================*/
+						/*!< out:
+						< 0 if n1 < n2,
+						0 if n1 == n2,
+						> 0 if n1 > n2 */
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2);			/*!< in: id2 */
+
+/******************************************************************//**
+Compare two fts_ranking_t instances doc_ids. */
+UNIV_INLINE
+int
+fts_ranking_doc_id_cmp(
+/*===================*/
+						/*!< out:
+						< 0 if n1 < n2,
+						0 if n1 == n2,
+						> 0 if n1 > n2 */
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2);			/*!< in: id2 */
+
+/******************************************************************//**
+Compare two fts_update_t instances doc_ids. */
+UNIV_INLINE
+int
+fts_update_doc_id_cmp(
+/*==================*/
+						/*!< out:
+						< 0 if n1 < n2,
+						0 if n1 == n2,
+						> 0 if n1 > n2 */
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2);			/*!< in: id2 */
+
+/******************************************************************//**
+Decode and return the integer that was encoded using our VLC scheme.*/
+UNIV_INLINE
+ulint
+fts_decode_vlc(
+/*===========*/
+			/*!< out: value decoded */
+	byte**	ptr);	/*!< in: ptr to decode from, this ptr is
+			incremented by the number of bytes decoded */
+
+/******************************************************************//**
+Duplicate an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_string_dup(
+/*================*/
+						/*!< out:
+						< 0 if n1 < n2,
+						0 if n1 == n2,
+						> 0 if n1 > n2 */
+	fts_string_t*		dst,		/*!< in: dup to here */
+	const fts_string_t*	src,		/*!< in: src string */
+	mem_heap_t*		heap);		/*!< in: heap to use */
+
+/******************************************************************//**
+Return length of val if it were encoded using our VLC scheme. */
+UNIV_INLINE
+ulint
+fts_get_encoded_len(
+/*================*/
+						/*!< out: length of value
+						 encoded, in bytes */
+	ulint		val);			/*!< in: value to encode */
+
+/******************************************************************//**
+Encode an integer using our VLC scheme and return the length in bytes. */
+UNIV_INLINE
+ulint
+fts_encode_int(
+/*===========*/
+						/*!< out: length of value
+						encoded, in bytes */
+	ulint		val,			/*!< in: value to encode */
+	byte*		buf);			/*!< in: buffer, must have
+						enough space */
+
+/******************************************************************//**
+Decode a UTF-8 character.
+
+http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
+
+ Scalar Value              1st Byte 2nd Byte 3rd Byte 4th Byte
+00000000 0xxxxxxx          0xxxxxxx
+00000yyy yyxxxxxx          110yyyyy 10xxxxxx
+zzzzyyyy yyxxxxxx          1110zzzz 10yyyyyy 10xxxxxx
+000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
+
+This function decodes UTF-8 sequences up to 6 bytes (31 bits).
+
+On error *ptr will point to the first byte that was not correctly
+decoded. This will hopefully help in resyncing the input. */
+UNIV_INLINE
+ulint
+fts_utf8_decode(
+/*============*/
+						/*!< out: UTF8_ERROR if *ptr
+						did not point to a valid
+						UTF-8 sequence, or the
+						Unicode code point. */
+	const byte**	ptr);			/*!< in/out: pointer to
+						UTF-8 string. The
+						pointer is advanced to
+						the start of the next
+						character. */
+
+/******************************************************************//**
+Lowercase an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_tolower(
+/*=============*/
+	fts_string_t*	str);			/*!< in: string */
+
+/******************************************************************//**
+Get the selected FTS aux INDEX suffix. */
+UNIV_INLINE
+const char*
+fts_get_suffix(
+/*===========*/
+	ulint		selected);		/*!< in: selected index */
+
+/********************************************************************
+Get the number of index selectors. */
+UNIV_INLINE
+ulint
+fts_get_n_selectors(void);
+/*=====================*/
+
+/******************************************************************//**
+Select the FTS auxiliary index for the given string.
+@return the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+/*=============*/
+	const CHARSET_INFO*	cs,		/*!< Charset */
+	const byte*		str,		/*!< in: word string */
+	ulint			len);		/*!< in: string length */
+
+/********************************************************************
+Select the next FTS auxiliary index for the given character.
+@return the next index to use for character */
+UNIV_INLINE
+ulint
+fts_select_next_index(
+/*==================*/
+	const CHARSET_INFO*	cs,		/*!< Charset */
+	const byte*		str,		/*!< in: string */
+	ulint			len);		/*!< in: string length */
+
+#ifndef UNIV_NONINL
+#include "fts0types.ic"
+#include "fts0vlc.ic"
+#endif
+
+#endif /* INNOBASE_FTS0TYPES_H */
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
new file mode 100644
index 00000000000..f0dfd023a70
--- /dev/null
+++ b/storage/innobase/include/fts0types.ic
@@ -0,0 +1,388 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0types.ic
+Full text search types.
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0TYPES_IC
+#define INNOBASE_FTS0TYPES_IC
+
+#include <ctype.h>
+
+#include "rem0cmp.h"
+#include "ha_prototypes.h"
+
+extern const ulint UTF8_ERROR;
+
+/* Determine if a UTF-8 continuation byte is valid. */
+#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
+
+/******************************************************************//**
+Duplicate an UTF-8 string.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+void
+fts_utf8_string_dup(
+/*================*/
+	fts_string_t*		dst,		/*!< in: dup to here */
+	const fts_string_t*	src,		/*!< in: src string */
+	mem_heap_t*		heap)		/*!< in: heap to use */
+{
+	dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1);
+	memcpy(dst->f_str, src->f_str, src->f_len);
+
+	dst->f_len = src->f_len;
+	dst->f_str[src->f_len] = 0;
+	dst->f_n_char = src->f_n_char;
+}
+
+/******************************************************************//**
+Compare two fts_trx_row_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_row_doc_id_cmp(
+/*===================*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const fts_trx_row_t*	tr1 = (const fts_trx_row_t*) p1;
+	const fts_trx_row_t*	tr2 = (const fts_trx_row_t*) p2;
+
+	return((int)(tr1->doc_id - tr2->doc_id));
+}
+
+/******************************************************************//**
+Compare two fts_ranking_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_ranking_doc_id_cmp(
+/*===================*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const fts_ranking_t*	rk1 = (const fts_ranking_t*) p1;
+	const fts_ranking_t*	rk2 = (const fts_ranking_t*) p2;
+
+	return((int)(rk1->doc_id - rk2->doc_id));
+}
+
+/******************************************************************//**
+Compare two fts_update_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_update_doc_id_cmp(
+/*==================*/
+	const void*	p1,			/*!< in: id1 */
+	const void*	p2)			/*!< in: id2 */
+{
+	const fts_update_t*	up1 = (const fts_update_t*) p1;
+	const fts_update_t*	up2 = (const fts_update_t*) p2;
+
+	return((int)(up1->doc_id - up2->doc_id));
+}
+
+
+/******************************************************************//**
+Lowercase an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_tolower(
+/*=============*/
+	fts_string_t*	str)			/*!< in: string */
+{
+	innobase_casedn_str((char*) str->f_str);
+}
+
+/******************************************************************//**
+Compare two UTF-8 strings.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_utf8_string_cmp(
+/*================*/
+	const void*	p1,			/*!< in: key */
+	const void*	p2)			/*!< in: node */
+{
+	const fts_string_t* s1 = (const fts_string_t*) p1;
+	const fts_string_t* s2 = (const fts_string_t*) p2;
+
+	return(cmp_data_data_slow_varchar(
+		s1->f_str, s1->f_len, s2->f_str, s2->f_len));
+}
+
+/******************************************************************//**
+Compare two UTF-8 strings, and return match (0) if
+passed in "key" value equals or is the prefix of the "node" value.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_utf8_string_cmp_prefix(
+/*=======================*/
+	const void*	p1,			/*!< in: key */
+	const void*	p2)			/*!< in: node */
+{
+	int	result;
+	ulint	len;
+
+	const fts_string_t* s1 = (const fts_string_t*) p1;
+	const fts_string_t* s2 = (const fts_string_t*) p2;
+
+	len = ut_min(s1->f_len, s2->f_len);
+
+	result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
+
+	if (result) {
+		return(result);
+	}
+
+	if (s1->f_len > s2->f_len) {
+		return(1);
+	}
+
+	return(0);
+}
+
+/******************************************************************//**
+Decode a UTF-8 character.
+
+http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
+
+ Scalar Value              1st Byte 2nd Byte 3rd Byte 4th Byte
+00000000 0xxxxxxx          0xxxxxxx
+00000yyy yyxxxxxx          110yyyyy 10xxxxxx
+zzzzyyyy yyxxxxxx          1110zzzz 10yyyyyy 10xxxxxx
+000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
+
+This function decodes UTF-8 sequences up to 6 bytes (31 bits).
+
+On error *ptr will point to the first byte that was not correctly
+decoded. This will hopefully help in resyncing the input.
+@return UTF8_ERROR if *ptr did not point to a valid
+UTF-8 sequence, or the Unicode code point. */
+UNIV_INLINE
+ulint
+fts_utf8_decode(
+/*============*/
+	const byte**	ptr)			/*!< in/out: pointer to
+						UTF-8 string. The
+						pointer is advanced to
+						the start of the next
+						character. */
+{
+	const byte*	p = *ptr;
+	ulint		ch = *p++;
+#ifdef UNIV_DEBUG
+	ulint		min_ch;
+#endif /* UNIV_DEBUG */
+
+	if (UNIV_LIKELY(ch < 0x80)) {
+		/* 0xxxxxxx */
+	} else if (UNIV_UNLIKELY(ch < 0xC0)) {
+		/* A continuation byte cannot start a code. */
+		goto err_exit;
+	} else if (ch < 0xE0) {
+		/* 110yyyyy 10xxxxxx */
+		ch &= 0x1F;
+		ut_d(min_ch = 0x80);
+		goto get1;
+	} else if (ch < 0xF0) {
+		/* 1110zzzz 10yyyyyy 10xxxxxx */
+		ch &= 0x0F;
+		ut_d(min_ch = 0x800);
+		goto get2;
+	} else if (ch < 0xF8) {
+		/* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
+		ch &= 0x07;
+		ut_d(min_ch = 0x10000);
+		goto get3;
+	} else if (ch < 0xFC) {
+		/* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
+		ch &= 0x03;
+		ut_d(min_ch = 0x200000);
+		goto get4;
+	} else if (ch < 0xFE) {
+		/* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
+		ut_d(min_ch = 0x4000000);
+		if (!fts_utf8_is_valid(*p)) {
+			goto err_exit;
+		}
+		ch <<= 6;
+		ch |= (*p++) & 0x3F;
+get4:
+		if (!fts_utf8_is_valid(*p)) {
+			goto err_exit;
+		}
+		ch <<= 6;
+		ch |= (*p++) & 0x3F;
+get3:
+		if (!fts_utf8_is_valid(*p)) {
+			goto err_exit;
+		}
+		ch <<= 6;
+		ch |= (*p++) & 0x3F;
+get2:
+		if (!fts_utf8_is_valid(*p)) {
+			goto err_exit;
+		}
+		ch <<= 6;
+		ch |= (*p++) & 0x3F;
+get1:
+		if (!fts_utf8_is_valid(*p)) {
+			goto err_exit;
+		}
+		ch <<= 6;
+		ch |= (*p++) & 0x3F;
+
+		/* The following is needed in the 6-byte case
+		when ulint is wider than 32 bits. */
+		ch &= 0xFFFFFFFF;
+
+		/* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
+		and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
+
+		if ( (ch >= 0xD800 && ch <= 0xDFFF)
+#ifdef UNIV_DEBUG
+		     || ch < min_ch
+#endif /* UNIV_DEBUG */
+		     || ch == 0xFFFE || ch == 0xFFFF) {
+
+			ch = UTF8_ERROR;
+		}
+	} else {
+err_exit:
+		ch = UTF8_ERROR;
+	}
+
+	*ptr = p;
+
+	return(ch);
+}
+
+/******************************************************************//**
+Get the first character's code position for FTS index partition */
+extern
+ulint
+innobase_strnxfrm(
+/*==============*/
+        const CHARSET_INFO*	cs,	/*!< in: Character set */
+        const uchar*		p2,	/*!< in: string */
+        const ulint		len2);	/*!< in: string length */
+
+/******************************************************************//**
+Select the FTS auxiliary index for the given character.
+@return the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+/*=============*/
+	const CHARSET_INFO*	cs,	/*!< in: Charset */
+	const byte*		str,	/*!< in: string */
+	ulint			len)	/*!< in: string length */
+{
+	ulint			selected = 0;
+	ulint			value = innobase_strnxfrm(cs, str, len);
+
+	while (fts_index_selector[selected].value != 0) {
+
+		if (fts_index_selector[selected].value == value) {
+
+			return(selected);
+
+		} else if (fts_index_selector[selected].value > value) {
+
+			return(selected > 0 ? selected - 1 : 0);
+		}
+
+		++selected;
+	}
+
+	ut_ad(selected > 1);
+
+	return(selected - 1);
+}
+
+/******************************************************************//**
+Select the next FTS auxiliary index for the given character.
+@return the next index to use for character */
+UNIV_INLINE
+ulint
+fts_select_next_index(
+/*==================*/
+	const CHARSET_INFO*	cs,	/*!< in: Charset */
+	const byte*		str,	/*!< in: string */
+	ulint			len)	/*!< in: string length */
+{
+	ulint		selected = 0;
+	ulint		value = innobase_strnxfrm(cs, str, len);
+
+	while (fts_index_selector[selected].value != 0) {
+
+		if (fts_index_selector[selected].value == value) {
+
+			return(selected + 1);
+
+		} else if (fts_index_selector[selected].value > value) {
+
+			return(selected);
+		}
+
+		++selected;
+	}
+
+	ut_ad(selected > 0);
+
+	return((ulint) selected);
+}
+
+/******************************************************************//**
+Return the selected FTS aux index suffix. */
+UNIV_INLINE
+const char*
+fts_get_suffix(
+/*===========*/
+	ulint		selected)	/*!< in: selected index */
+{
+	return(fts_index_selector[selected].suffix);
+}
+
+/******************************************************************//**
+Get the number of index selectors.
+@return The number of selectors */
+UNIV_INLINE
+ulint
+fts_get_n_selectors(void)
+/*=====================*/
+{
+	ulint	i = 0;
+
+	// FIXME: This is a hack
+	while (fts_index_selector[i].value != 0) {
+		++i;
+	}
+
+	return(i);
+}
+
+#endif /* INNOBASE_FTS0TYPES_IC */
diff --git a/storage/innobase/include/fts0vlc.ic b/storage/innobase/include/fts0vlc.ic
new file mode 100644
index 00000000000..e79bcf59347
--- /dev/null
+++ b/storage/innobase/include/fts0vlc.ic
@@ -0,0 +1,142 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0vlc.ic
+Full text variable length integer encoding/decoding.
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0VLC_IC
+#define INNOBASE_FTS0VLC_IC
+
+#include "fts0types.h"
+
+/******************************************************************//**
+Return length of val if it were encoded using our VLC scheme.
+FIXME: We will need to be able encode 8 bytes value
+@return length of value encoded, in bytes */
+UNIV_INLINE
+ulint
+fts_get_encoded_len(
+/*================*/
+	ulint	val)	/* in: value to encode */
+{
+	if (val <= 127) {
+		return(1);
+	} else if (val <= 16383) {
+		return(2);
+	} else if (val <= 2097151) {
+		return(3);
+	} else if (val <= 268435455) {
+		return(4);
+	} else {
+		/* Possibly we should care that on 64-bit machines ulint can
+		contain values that we can't encode in 5 bytes, but
+		fts_encode_int doesn't handle them either so it doesn't much
+		matter. */
+
+		return(5);
+	}
+}
+
+/******************************************************************//**
+Encode an integer using our VLC scheme and return the length in bytes.
+@return length of value encoded, in bytes */
+UNIV_INLINE
+ulint
+fts_encode_int(
+/*===========*/
+	ulint	val,	/* in: value to encode */
+	byte*	buf)	/* in: buffer, must have enough space */
+{
+	ulint	len;
+
+	if (val <= 127) {
+		*buf = (byte) val;
+
+		len = 1;
+	} else if (val <= 16383) {
+		*buf++ = (byte)(val >> 7);
+		*buf = (byte)(val & 0x7F);
+
+		len = 2;
+	} else if (val <= 2097151) {
+		*buf++ = (byte)(val >> 14);
+		*buf++ = (byte)((val >> 7) & 0x7F);
+		*buf = (byte)(val & 0x7F);
+
+		len = 3;
+	} else if (val <= 268435455) {
+		*buf++ = (byte)(val >> 21);
+		*buf++ = (byte)((val >> 14) & 0x7F);
+		*buf++ = (byte)((val >> 7) & 0x7F);
+		*buf = (byte)(val & 0x7F);
+
+		len = 4;
+	} else {
+		/* Best to keep the limitations of the 32/64 bit versions
+		identical, at least for the time being. */
+		ut_ad(val <= 4294967295u);
+
+		*buf++ = (byte)(val >> 28);
+		*buf++ = (byte)((val >> 21) & 0x7F);
+		*buf++ = (byte)((val >> 14) & 0x7F);
+		*buf++ = (byte)((val >> 7) & 0x7F);
+		*buf = (byte)(val & 0x7F);
+
+		len = 5;
+	}
+
+	/* High-bit on means "last byte in the encoded integer". */
+	*buf |= 0x80;
+
+	return(len);
+}
+
+/******************************************************************//**
+Decode and return the integer that was encoded using our VLC scheme.
+@return value decoded */
+UNIV_INLINE
+ulint
+fts_decode_vlc(
+/*===========*/
+	byte**	ptr)	/* in: ptr to decode from, this ptr is
+			incremented by the number of bytes decoded */
+{
+	ulint	val = 0;
+
+	for (;;) {
+		byte	b = **ptr;
+
+		++*ptr;
+		val |= (b & 0x7F);
+
+		/* High-bit on means "last byte in the encoded integer". */
+		if (b & 0x80) {
+			break;
+		} else {
+			val <<= 7;
+		}
+	}
+
+	return(val);
+}
+
+#endif
diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h
new file mode 100644
index 00000000000..851cdb44cdf
--- /dev/null
+++ b/storage/innobase/include/fut0fut.h
@@ -0,0 +1,55 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0fut.h
+File-based utilities
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+
+#ifndef fut0fut_h
+#define fut0fut_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+/********************************************************************//**
+Gets a pointer to a file address and latches the page.
+@return pointer to a byte in a frame; the file page in the frame is
+bufferfixed and latched */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fil_addr_t	addr,	/*!< in: file address */
+	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr);	/*!< in: mtr handle */
+
+#ifndef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
+#endif
+
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
new file mode 100644
index 00000000000..b065b10b9ca
--- /dev/null
+++ b/storage/innobase/include/fut0fut.ic
@@ -0,0 +1,56 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0fut.ic
+File-based utilities
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+
+/********************************************************************//**
+Gets a pointer to a file address and latches the page.
+@return pointer to a byte in a frame; the file page in the frame is
+bufferfixed and latched */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	fil_addr_t	addr,	/*!< in: file address */
+	ulint		rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr)	/*!< in: mtr handle */
+{
+	buf_block_t*	block;
+	byte*		ptr;
+
+	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
+	ptr = buf_block_get_frame(block) + addr.boffset;
+
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+	return(ptr);
+}
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
new file mode 100644
index 00000000000..90f9a65d4fa
--- /dev/null
+++ b/storage/innobase/include/fut0lst.h
@@ -0,0 +1,217 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0lst.h
+File-based list utilities
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef fut0lst_h
+#define fut0lst_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+
+/* The C 'types' of base node and list node: these should be used to
+write self-documenting code. Of course, the sizeof macro cannot be
+applied to these types! */
+
+typedef	byte	flst_base_node_t;
+typedef	byte	flst_node_t;
+
+/* The physical size of a list base node in bytes */
+#define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
+
+/* The physical size of a list node in bytes */
+#define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Adds a node as the last node in a list. */
+UNIV_INTERN
+void
+flst_add_last(
+/*==========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Adds a node as the first node in a list. */
+UNIV_INTERN
+void
+flst_add_first(
+/*===========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node,	/*!< in: node to add */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Inserts a node after another in a list. */
+UNIV_INTERN
+void
+flst_insert_after(
+/*==============*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node1,	/*!< in: node to insert after */
+	flst_node_t*		node2,	/*!< in: node to add */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Inserts a node before another in a list. */
+UNIV_INTERN
+void
+flst_insert_before(
+/*===============*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: node to insert */
+	flst_node_t*		node3,	/*!< in: node to insert before */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Removes a node. */
+UNIV_INTERN
+void
+flst_remove(
+/*========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: node to remove */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+UNIV_INTERN
+void
+flst_cut_end(
+/*=========*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: first node to remove */
+	ulint			n_nodes,/*!< in: number of nodes to remove,
+					must be >= 1 */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+UNIV_INTERN
+void
+flst_truncate_end(
+/*==============*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	flst_node_t*		node2,	/*!< in: first node not to remove */
+	ulint			n_nodes,/*!< in: number of nodes to remove */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list length.
+@return	length */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list first node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list last node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list next node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Gets list prev node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	fil_addr_t	addr,	/*!< in: file address */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Reads a file address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+	const fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+/********************************************************************//**
+Validates a file-based list.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+flst_validate(
+/*==========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	mtr_t*			mtr1);	/*!< in: mtr */
+/********************************************************************//**
+Prints info of a file-based list. */
+UNIV_INTERN
+void
+flst_print(
+/*=======*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node of list */
+	mtr_t*			mtr);	/*!< in: mtr */
+
+
+#ifndef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
new file mode 100644
index 00000000000..d18cf21378f
--- /dev/null
+++ b/storage/innobase/include/fut0lst.ic
@@ -0,0 +1,167 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fut0lst.ic
+File-based list utilities
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+#include "mtr0log.h"
+#include "buf0buf.h"
+
+/* We define the field offsets of a node for the list */
+#define FLST_PREV	0	/* 6-byte address of the previous list element;
+				the page part of address is FIL_NULL, if no
+				previous element */
+#define FLST_NEXT	FIL_ADDR_SIZE	/* 6-byte address of the next
+				list element; the page part of address
+				is FIL_NULL, if no next element */
+
+/* We define the field offsets of a base node for the list */
+#define FLST_LEN	0	/* 32-bit list length field */
+#define	FLST_FIRST	4	/* 6-byte address of the first element
+				of the list; undefined if empty list */
+#define	FLST_LAST	(4 + FIL_ADDR_SIZE) /* 6-byte address of the
+				last element of the list; undefined
+				if empty list */
+
+/********************************************************************//**
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	fil_addr_t	addr,	/*!< in: file address */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(faddr && mtr);
+	ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
+	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+
+	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
+	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
+			 MLOG_2BYTES, mtr);
+}
+
+/********************************************************************//**
+Reads a file address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+	const fil_faddr_t*	faddr,	/*!< in: pointer to file faddress */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	fil_addr_t	addr;
+
+	ut_ad(faddr && mtr);
+
+	addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
+	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
+				      mtr);
+	ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
+	ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+	return(addr);
+}
+
+/********************************************************************//**
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
+
+	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
+	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
+	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
+}
+
+/********************************************************************//**
+Gets list length.
+@return	length */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+}
+
+/********************************************************************//**
+Gets list first node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	return(flst_read_addr(base + FLST_FIRST, mtr));
+}
+
+/********************************************************************//**
+Gets list last node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+	const flst_base_node_t*	base,	/*!< in: pointer to base node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	return(flst_read_addr(base + FLST_LAST, mtr));
+}
+
+/********************************************************************//**
+Gets list next node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	return(flst_read_addr(node + FLST_NEXT, mtr));
+}
+
+/********************************************************************//**
+Gets list prev node address.
+@return	file address */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+	const flst_node_t*	node,	/*!< in: pointer to node */
+	mtr_t*			mtr)	/*!< in: mini-transaction handle */
+{
+	return(flst_read_addr(node + FLST_PREV, mtr));
+}
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
new file mode 100644
index 00000000000..7351b407e8c
--- /dev/null
+++ b/storage/innobase/include/ha0ha.h
@@ -0,0 +1,265 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0ha.h
+The hash table with external chains
+
+Created 8/18/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef ha0ha_h
+#define ha0ha_h
+
+#include "univ.i"
+
+#include "hash0hash.h"
+#include "page0types.h"
+#include "buf0types.h"
+#include "rem0types.h"
+
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the data of the first hash table node in chain
+having the fold number, NULL if not found */
+UNIV_INLINE
+const rec_t*
+ha_search_and_get_data(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: folded value of the searched data */
+/*********************************************************//**
+Looks for an element when we know the pointer to the data and updates
+the pointer to data if found.
+@return TRUE if found */
+UNIV_INTERN
+ibool
+ha_search_and_update_if_found_func(
+/*===============================*/
+	hash_table_t*	table,	/*!< in/out: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	const rec_t*	data,	/*!< in: pointer to the data */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	new_block,/*!< in: block containing new_data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	const rec_t*	new_data);/*!< in: new pointer to the data */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Looks for an element when we know the pointer to the data and
+updates the pointer to data if found.
+@param table		in/out: hash table
+@param fold		in: folded value of the searched data
+@param data		in: pointer to the data
+@param new_block	in: block containing new_data
+@param new_data		in: new pointer to the data */
+# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
+	ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/** Looks for an element when we know the pointer to the data and
+updates the pointer to data if found.
+@param table		in/out: hash table
+@param fold		in: folded value of the searched data
+@param data		in: pointer to the data
+@param new_block	ignored: block containing new_data
+@param new_data		in: new pointer to the data */
+# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
+	ha_search_and_update_if_found_func(table,fold,data,new_data)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/*************************************************************//**
+Creates a hash table with at least n array cells.  The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+@return	own: created table */
+UNIV_INTERN
+hash_table_t*
+ha_create_func(
+/*===========*/
+	ulint	n,		/*!< in: number of array cells */
+#ifdef UNIV_SYNC_DEBUG
+	ulint	mutex_level,	/*!< in: level of the mutexes in the latching
+				order: this is used in the debug version */
+#endif /* UNIV_SYNC_DEBUG */
+	ulint	n_mutexes,	/*!< in: number of mutexes to protect the
+				hash table: must be a power of 2, or 0 */
+	ulint	type);		/*!< in: type of datastructure for which
+				the memory heap is going to be used e.g.:
+				MEM_HEAP_FOR_BTR_SEARCH or
+				MEM_HEAP_FOR_PAGE_HASH */
+#ifdef UNIV_SYNC_DEBUG
+/** Creates a hash table.
+@return		own: created table
+@param n_c	in: number of array cells.  The actual number of cells is
+chosen to be a slightly bigger prime number.
+@param level	in: level of the mutexes in the latching order
+@param n_m	in: number of mutexes to protect the hash table;
+		must be a power of 2, or 0 */
+# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type)
+#else /* UNIV_SYNC_DEBUG */
+/** Creates a hash table.
+@return		own: created table
+@param n_c	in: number of array cells.  The actual number of cells is
+chosen to be a slightly bigger prime number.
+@param level	in: level of the mutexes in the latching order
+@param n_m	in: number of mutexes to protect the hash table;
+		must be a power of 2, or 0 */
+# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type)
+#endif /* UNIV_SYNC_DEBUG */
+
+/*************************************************************//**
+Empties a hash table and frees the memory heaps. */
+UNIV_INTERN
+void
+ha_clear(
+/*=====*/
+	hash_table_t*	table);	/*!< in, own: hash table */
+
+/*************************************************************//**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated */
+UNIV_INTERN
+ibool
+ha_insert_for_fold_func(
+/*====================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block,	/*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	const rec_t*	data);	/*!< in: data, must not be NULL */
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated
+@param t	in: hash table
+@param f	in: folded value of data
+@param b	in: buffer block containing the data
+@param d	in: data, must not be NULL */
+# define ha_insert_for_fold(t,f,b,d) 	do {		\
+	ha_insert_for_fold_func(t,f,b,d);		\
+	MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);	\
+} while(0)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/**
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted.
+@return	TRUE if succeed, FALSE if no more memory could be allocated
+@param t	in: hash table
+@param f	in: folded value of data
+@param b	ignored: buffer block containing the data
+@param d	in: data, must not be NULL */
+# define ha_insert_for_fold(t,f,b,d)	do {		\
+	ha_insert_for_fold_func(t,f,d);			\
+	MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);	\
+} while (0)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data and deletes
+it from the hash table if found.
+@return	TRUE if found */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	const rec_t*	data);	/*!< in: pointer to the data */
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+UNIV_INTERN
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: fold value */
+	const page_t*	page);	/*!< in: buffer page */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/*************************************************************//**
+Validates a given range of the cells in hash table.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+ha_validate(
+/*========*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ulint		start_index,	/*!< in: start index */
+	ulint		end_index);	/*!< in: end index */
+#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+/*************************************************************//**
+Prints info of a hash table. */
+UNIV_INTERN
+void
+ha_print_info(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	hash_table_t*	table);	/*!< in: hash table */
+#endif /* !UNIV_HOTBACKUP */
+
+/** The hash table external chain node */
+struct ha_node_t {
+	ha_node_t*	next;	/*!< next chain node or NULL if none */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block;	/*!< buffer block containing the data, or NULL */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	const rec_t*	data;	/*!< pointer to the data */
+	ulint		fold;	/*!< fold value for the data */
+};
+
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Assert that the synchronization object in a hash operation involving
+possible change in the hash table is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held in exclusive mode. */
+UNIV_INLINE
+void
+hash_assert_can_modify(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold value */
+/********************************************************************//**
+Assert that the synchronization object in a hash search operation is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held either in x-mode or s-mode. */
+UNIV_INLINE
+void
+hash_assert_can_search(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold value */
+#else /* UNIV_DEBUG */
+#define hash_assert_can_modify(t, f)
+#define hash_assert_can_search(t, f)
+#endif /* UNIV_DEBUG */
+
+
+#ifndef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic
new file mode 100644
index 00000000000..c478ff54303
--- /dev/null
+++ b/storage/innobase/include/ha0ha.ic
@@ -0,0 +1,246 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/ha0ha.ic
+The hash table with external chains
+
+Created 8/18/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0rnd.h"
+#include "mem0mem.h"
+#include "btr0types.h"
+
+/***********************************************************//**
+Deletes a hash node. */
+UNIV_INTERN
+void
+ha_delete_hash_node(
+/*================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ha_node_t*	del_node);	/*!< in: node to be deleted */
+
+/******************************************************************//**
+Gets a hash node data.
+@return	pointer to the data */
+UNIV_INLINE
+const rec_t*
+ha_node_get_data(
+/*=============*/
+	const ha_node_t*	node)	/*!< in: hash chain node */
+{
+	return(node->data);
+}
+
+/******************************************************************//**
+Sets hash node data. */
+UNIV_INLINE
+void
+ha_node_set_data_func(
+/*==================*/
+	ha_node_t*	node,	/*!< in: hash chain node */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	buf_block_t*	block,	/*!< in: buffer block containing the data */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	const rec_t*	data)	/*!< in: pointer to the data */
+{
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+	node->block = block;
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	node->data = data;
+}
+
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+/** Sets hash node data.
+@param n	in: hash chain node
+@param b	in: buffer block containing the data
+@param d	in: pointer to the data */
+# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
+#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+/** Sets hash node data.
+@param n	in: hash chain node
+@param b	in: buffer block containing the data
+@param d	in: pointer to the data */
+# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+/******************************************************************//**
+Gets the next node in a hash chain.
+@return	next node, NULL if none */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_next(
+/*==============*/
+	ha_node_t*	node)	/*!< in: hash chain node */
+{
+	return(node->next);
+}
+
+/******************************************************************//**
+Gets the first node in a hash chain.
+@return	first node, NULL if none */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_first(
+/*===============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold value determining the chain */
+{
+	return((ha_node_t*)
+	       hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
+}
+
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Assert that the synchronization object in a hash operation involving
+possible change in the hash table is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held in exclusive mode. */
+UNIV_INLINE
+void
+hash_assert_can_modify(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold value */
+{
+	if (table->type == HASH_TABLE_SYNC_MUTEX) {
+		ut_ad(mutex_own(hash_get_mutex(table, fold)));
+	} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
+# ifdef UNIV_SYNC_DEBUG
+		rw_lock_t* lock = hash_get_lock(table, fold);
+		ut_ad(rw_lock_own(lock, RW_LOCK_EX));
+# endif
+	} else {
+		ut_ad(table->type == HASH_TABLE_SYNC_NONE);
+	}
+}
+
+/********************************************************************//**
+Assert that the synchronization object in a hash search operation is held.
+Note that in case of mutexes we assert that mutex is owned while in case
+of rw-locks we assert that it is held either in x-mode or s-mode. */
+UNIV_INLINE
+void
+hash_assert_can_search(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold value */
+{
+	if (table->type == HASH_TABLE_SYNC_MUTEX) {
+		ut_ad(mutex_own(hash_get_mutex(table, fold)));
+	} else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
+# ifdef UNIV_SYNC_DEBUG
+		rw_lock_t* lock = hash_get_lock(table, fold);
+		ut_ad(rw_lock_own(lock, RW_LOCK_EX)
+		      || rw_lock_own(lock, RW_LOCK_SHARED));
+# endif
+	} else {
+		ut_ad(table->type == HASH_TABLE_SYNC_NONE);
+	}
+}
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Looks for an element in a hash table.
+@return pointer to the data of the first hash table node in chain
+having the fold number, NULL if not found */
+UNIV_INLINE
+const rec_t*
+ha_search_and_get_data(
+/*===================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+	hash_assert_can_search(table, fold);
+	ut_ad(btr_search_enabled);
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node->data);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data.
+@return	pointer to the hash table node, NULL if not found in the table */
+UNIV_INLINE
+ha_node_t*
+ha_search_with_data(
+/*================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	const rec_t*	data)	/*!< in: pointer to the data */
+{
+	ha_node_t*	node;
+
+	hash_assert_can_search(table, fold);
+
+	ut_ad(btr_search_enabled);
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->data == data) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*********************************************************//**
+Looks for an element when we know the pointer to the data, and deletes
+it from the hash table, if found.
+@return	TRUE if found */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold,	/*!< in: folded value of the searched data */
+	const rec_t*	data)	/*!< in: pointer to the data */
+{
+	ha_node_t*	node;
+
+	hash_assert_can_modify(table, fold);
+	ut_ad(btr_search_enabled);
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		ha_delete_hash_node(table, node);
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h
new file mode 100644
index 00000000000..0073930b502
--- /dev/null
+++ b/storage/innobase/include/ha0storage.h
@@ -0,0 +1,140 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0storage.h
+Hash storage.
+Provides a data structure that stores chunks of data in
+its own storage, avoiding duplicates.
+
+Created September 22, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef ha0storage_h
+#define ha0storage_h
+
+#include "univ.i"
+
+/** This value is used by default by ha_storage_create(). More memory
+is allocated later when/if it is needed. */
+#define HA_STORAGE_DEFAULT_HEAP_BYTES	1024
+
+/** This value is used by default by ha_storage_create(). It is a
+constant per ha_storage's lifetime. */
+#define HA_STORAGE_DEFAULT_HASH_CELLS	4096
+
+/** Hash storage */
+struct ha_storage_t;
+
+/*******************************************************************//**
+Creates a hash storage. If any of the parameters is 0, then a default
+value is used.
+@return	own: hash storage */
+UNIV_INLINE
+ha_storage_t*
+ha_storage_create(
+/*==============*/
+	ulint	initial_heap_bytes,	/*!< in: initial heap's size */
+	ulint	initial_hash_cells);	/*!< in: initial number of cells
+					in the hash table */
+
+/*******************************************************************//**
+Copies data into the storage and returns a pointer to the copy. If the
+same data chunk is already present, then pointer to it is returned.
+Data chunks are considered to be equal if len1 == len2 and
+memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
+data_len bytes need to be allocated) and the size of storage is going to
+become more than "memlim" then "data" is not added and NULL is returned.
+To disable this behavior "memlim" can be set to 0, which stands for
+"no limit".
+@return	pointer to the copy */
+UNIV_INTERN
+const void*
+ha_storage_put_memlim(
+/*==================*/
+	ha_storage_t*	storage,	/*!< in/out: hash storage */
+	const void*	data,		/*!< in: data to store */
+	ulint		data_len,	/*!< in: data length */
+	ulint		memlim);	/*!< in: memory limit to obey */
+
+/*******************************************************************//**
+Same as ha_storage_put_memlim() but without memory limit.
+@param storage	in/out: hash storage
+@param data	in: data to store
+@param data_len	in: data length
+@return		pointer to the copy of the string */
+#define ha_storage_put(storage, data, data_len)	\
+	ha_storage_put_memlim((storage), (data), (data_len), 0)
+
+/*******************************************************************//**
+Copies string into the storage and returns a pointer to the copy. If the
+same string is already present, then pointer to it is returned.
+Strings are considered to be equal if strcmp(str1, str2) == 0.
+@param storage	in/out: hash storage
+@param str	in: string to put
+@return		pointer to the copy of the string */
+#define ha_storage_put_str(storage, str)	\
+	((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
+
+/*******************************************************************//**
+Copies string into the storage and returns a pointer to the copy obeying
+a memory limit.
+If the same string is already present, then pointer to it is returned.
+Strings are considered to be equal if strcmp(str1, str2) == 0.
+@param storage	in/out: hash storage
+@param str	in: string to put
+@param memlim	in: memory limit to obey
+@return		pointer to the copy of the string */
+#define ha_storage_put_str_memlim(storage, str, memlim)	\
+	((const char*) ha_storage_put_memlim((storage), (str),	\
+					     strlen(str) + 1, (memlim)))
+
+/*******************************************************************//**
+Empties a hash storage, freeing memory occupied by data chunks.
+This invalidates any pointers previously returned by ha_storage_put().
+The hash storage is not invalidated itself and can be used again. */
+UNIV_INLINE
+void
+ha_storage_empty(
+/*=============*/
+	ha_storage_t**	storage);	/*!< in/out: hash storage */
+
+/*******************************************************************//**
+Frees a hash storage and everything it contains, it cannot be used after
+this call.
+This invalidates any pointers previously returned by ha_storage_put(). */
+UNIV_INLINE
+void
+ha_storage_free(
+/*============*/
+	ha_storage_t*	storage);	/*!< in, own: hash storage */
+
+/*******************************************************************//**
+Gets the size of the memory used by a storage.
+@return	bytes used */
+UNIV_INLINE
+ulint
+ha_storage_get_size(
+/*================*/
+	const ha_storage_t*	storage);	/*!< in: hash storage */
+
+#ifndef UNIV_NONINL
+#include "ha0storage.ic"
+#endif
+
+#endif /* ha0storage_h */
diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
new file mode 100644
index 00000000000..7150ca045ec
--- /dev/null
+++ b/storage/innobase/include/ha0storage.ic
@@ -0,0 +1,146 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ha0storage.ic
+Hash storage.
+Provides a data structure that stores chunks of data in
+its own storage, avoiding duplicates.
+
+Created September 24, 2007 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "ha0storage.h"
+#include "hash0hash.h"
+#include "mem0mem.h"
+
+/** Hash storage for strings */
+struct ha_storage_t {
+	mem_heap_t*	heap;	/*!< memory heap from which memory is
+				allocated */
+	hash_table_t*	hash;	/*!< hash table used to avoid
+				duplicates */
+};
+
+/** Objects of this type are stored in ha_storage_t */
+struct ha_storage_node_t {
+	ulint			data_len;/*!< length of the data */
+	const void*		data;	/*!< pointer to data */
+	ha_storage_node_t*	next;	/*!< next node in hash chain */
+};
+
+/*******************************************************************//**
+Creates a hash storage. If any of the parameters is 0, then a default
+value is used.
+@return	own: hash storage */
+UNIV_INLINE
+ha_storage_t*
+ha_storage_create(
+/*==============*/
+	ulint	initial_heap_bytes,	/*!< in: initial heap's size */
+	ulint	initial_hash_cells)	/*!< in: initial number of cells
+					in the hash table */
+{
+	ha_storage_t*	storage;
+	mem_heap_t*	heap;
+
+	if (initial_heap_bytes == 0) {
+
+		initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES;
+	}
+
+	if (initial_hash_cells == 0) {
+
+		initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS;
+	}
+
+	/* we put "storage" within "storage->heap" */
+
+	heap = mem_heap_create(sizeof(ha_storage_t)
+			       + initial_heap_bytes);
+
+	storage = (ha_storage_t*) mem_heap_alloc(heap,
+						 sizeof(ha_storage_t));
+
+	storage->heap = heap;
+	storage->hash = hash_create(initial_hash_cells);
+
+	return(storage);
+}
+
+/*******************************************************************//**
+Empties a hash storage, freeing memory occupied by data chunks.
+This invalidates any pointers previously returned by ha_storage_put().
+The hash storage is not invalidated itself and can be used again. */
+UNIV_INLINE
+void
+ha_storage_empty(
+/*=============*/
+	ha_storage_t**	storage)	/*!< in/out: hash storage */
+{
+	ha_storage_t	temp_storage;
+
+	temp_storage.heap = (*storage)->heap;
+	temp_storage.hash = (*storage)->hash;
+
+	hash_table_clear(temp_storage.hash);
+	mem_heap_empty(temp_storage.heap);
+
+	*storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap,
+						  sizeof(ha_storage_t));
+
+	(*storage)->heap = temp_storage.heap;
+	(*storage)->hash = temp_storage.hash;
+}
+
+/*******************************************************************//**
+Frees a hash storage and everything it contains, it cannot be used after
+this call.
+This invalidates any pointers previously returned by ha_storage_put(). */
+UNIV_INLINE
+void
+ha_storage_free(
+/*============*/
+	ha_storage_t*	storage)	/*!< in, own: hash storage */
+{
+	/* order is important because the pointer storage->hash is
+	within the heap */
+	hash_table_free(storage->hash);
+	mem_heap_free(storage->heap);
+}
+
+/*******************************************************************//**
+Gets the size of the memory used by a storage.
+@return	bytes used */
+UNIV_INLINE
+ulint
+ha_storage_get_size(
+/*================*/
+	const ha_storage_t*	storage)	/*!< in: hash storage */
+{
+	ulint	ret;
+
+	ret = mem_heap_get_size(storage->heap);
+
+	/* this assumes hash->heap and hash->heaps are NULL */
+	ret += sizeof(hash_table_t);
+	ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash);
+
+	return(ret);
+}
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
new file mode 100644
index 00000000000..fa202aa773e
--- /dev/null
+++ b/storage/innobase/include/ha_prototypes.h
@@ -0,0 +1,596 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ha_prototypes.h
+Prototypes for global functions in ha_innodb.cc that are called by
+InnoDB C code
+
+Created 5/11/2006 Osku Salerma
+************************************************************************/
+
+#ifndef HA_INNODB_PROTOTYPES_H
+#define HA_INNODB_PROTOTYPES_H
+
+#include "my_dbug.h"
+#include "mysqld_error.h"
+#include "my_compare.h"
+#include "my_sys.h"
+#include "m_string.h"
+#include "debug_sync.h"
+
+#include "trx0types.h"
+#include "m_ctype.h" /* CHARSET_INFO */
+
+// Forward declarations
+class Field;
+struct fts_string_t;
+
+/*********************************************************************//**
+Wrapper around MySQL's copy_and_convert function.
+@return	number of bytes copied to 'to' */
+UNIV_INTERN
+ulint
+innobase_convert_string(
+/*====================*/
+	void*		to,		/*!< out: converted string */
+	ulint		to_length,	/*!< in: number of bytes reserved
+					for the converted string */
+	CHARSET_INFO*	to_cs,		/*!< in: character set to convert to */
+	const void*	from,		/*!< in: string to convert */
+	ulint		from_length,	/*!< in: number of bytes to convert */
+	CHARSET_INFO*	from_cs,	/*!< in: character set to convert
+					from */
+	uint*		errors);	/*!< out: number of errors encountered
+					during the conversion */
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) that is of
+type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
+the result to "buf". The result is converted to "system_charset_info".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size > 0) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return	number of bytes that were written */
+UNIV_INTERN
+ulint
+innobase_raw_format(
+/*================*/
+	const char*	data,		/*!< in: raw data */
+	ulint		data_len,	/*!< in: raw data length
+					in bytes */
+	ulint		charset_coll,	/*!< in: charset collation */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size);	/*!< in: output buffer size
+					in bytes */
+
+/*****************************************************************//**
+Invalidates the MySQL query cache for the table. */
+UNIV_INTERN
+void
+innobase_invalidate_query_cache(
+/*============================*/
+	trx_t*		trx,		/*!< in: transaction which
+					modifies the table */
+	const char*	full_name,	/*!< in: concatenation of
+					database name, null char NUL,
+					table name, null char NUL;
+					NOTE that in Windows this is
+					always in LOWER CASE! */
+	ulint		full_name_len);	/*!< in: full name length where
+					also the null chars count */
+
+/*****************************************************************//**
+Convert a table or index name to the MySQL system_charset_info (UTF-8)
+and quote it if needed.
+@return	pointer to the end of buf */
+UNIV_INTERN
+char*
+innobase_convert_name(
+/*==================*/
+	char*		buf,	/*!< out: buffer for converted identifier */
+	ulint		buflen,	/*!< in: length of buf, in bytes */
+	const char*	id,	/*!< in: identifier to convert */
+	ulint		idlen,	/*!< in: length of id, in bytes */
+	THD*		thd,	/*!< in: MySQL connection thread, or NULL */
+	ibool		table_id);/*!< in: TRUE=id is a table or database name;
+				FALSE=id is an index name */
+
+/******************************************************************//**
+Returns true if the thread is the replication thread on the slave
+server. Used in srv_conc_enter_innodb() to determine if the thread
+should be allowed to enter InnoDB - the replication thread is treated
+differently than other threads. Also used in
+srv_conc_force_exit_innodb().
+@return	true if thd is the replication thread */
+UNIV_INTERN
+ibool
+thd_is_replication_slave_thread(
+/*============================*/
+	THD*	thd);	/*!< in: thread handle */
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+UNIV_INTERN
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+	const THD* thd)	/*!< in: thread handle */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************************//**
+Returns true if the transaction this thread is processing has edited
+non-transactional tables. Used by the deadlock detector when deciding
+which transaction to rollback in case of a deadlock - we try to avoid
+rolling back transactions that have edited non-transactional tables.
+@return	true if non-transactional tables have been edited */
+UNIV_INTERN
+ibool
+thd_has_edited_nontrans_tables(
+/*===========================*/
+	THD*	thd);	/*!< in: thread handle */
+
+/*************************************************************//**
+Prints info of a THD object (== user session thread) to the given file. */
+UNIV_INTERN
+void
+innobase_mysql_print_thd(
+/*=====================*/
+	FILE*	f,		/*!< in: output stream */
+	THD*	thd,		/*!< in: pointer to a MySQL THD object */
+	uint	max_query_len);	/*!< in: max query length to print, or 0 to
+				   use the default max length */
+
+/*************************************************************//**
+InnoDB uses this function to compare two data fields for which the data type
+is such that we must use MySQL code to compare them.
+@return	1, 0, -1, if a is greater, equal, less than b, respectively */
+UNIV_INTERN
+int
+innobase_mysql_cmp(
+/*===============*/
+	int		mysql_type,	/*!< in: MySQL type */
+	uint		charset_number,	/*!< in: number of the charset */
+	const unsigned char* a,		/*!< in: data field */
+	unsigned int	a_length,	/*!< in: data field length,
+					not UNIV_SQL_NULL */
+	const unsigned char* b,		/*!< in: data field */
+	unsigned int	b_length)	/*!< in: data field length,
+					not UNIV_SQL_NULL */
+	__attribute__((nonnull, warn_unused_result));
+/**************************************************************//**
+Converts a MySQL type to an InnoDB type. Note that this function returns
+the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
+VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
+@return	DATA_BINARY, DATA_VARCHAR, ... */
+UNIV_INTERN
+ulint
+get_innobase_type_from_mysql_type(
+/*==============================*/
+	ulint*		unsigned_flag,	/*!< out: DATA_UNSIGNED if an
+					'unsigned type';
+					at least ENUM and SET,
+					and unsigned integer
+					types are 'unsigned types' */
+	const void*	field)		/*!< in: MySQL Field */
+	__attribute__((nonnull));
+
+/******************************************************************//**
+Get the variable length bounds of the given character set. */
+UNIV_INTERN
+void
+innobase_get_cset_width(
+/*====================*/
+	ulint	cset,		/*!< in: MySQL charset-collation code */
+	ulint*	mbminlen,	/*!< out: minimum length of a char (in bytes) */
+	ulint*	mbmaxlen);	/*!< out: maximum length of a char (in bytes) */
+
+/******************************************************************//**
+Compares NUL-terminated UTF-8 strings case insensitively.
+@return	0 if a=b, <0 if a<b, >1 if a>b */
+UNIV_INTERN
+int
+innobase_strcasecmp(
+/*================*/
+	const char*	a,	/*!< in: first string to compare */
+	const char*	b);	/*!< in: second string to compare */
+
+/******************************************************************//**
+Compares NUL-terminated UTF-8 strings case insensitively. The
+second string contains wildcards.
+@return 0 if a match is found, 1 if not */
+UNIV_INTERN
+int
+innobase_wildcasecmp(
+/*=================*/
+	const char*	a,	/*!< in: string to compare */
+	const char*	b);	/*!< in: wildcard string to compare */
+
+/******************************************************************//**
+Strip dir name from a full path name and return only its file name.
+@return file name or "null" if no file name */
+UNIV_INTERN
+const char*
+innobase_basename(
+/*==============*/
+	const char*	path_name);	/*!< in: full path name */
+
+/******************************************************************//**
+Returns true if the thread is executing a SELECT statement.
+@return	true if thd is executing SELECT */
+UNIV_INTERN
+ibool
+thd_is_select(
+/*==========*/
+	const THD*	thd);	/*!< in: thread handle */
+
+/******************************************************************//**
+Converts an identifier to a table name. */
+UNIV_INTERN
+void
+innobase_convert_from_table_id(
+/*===========================*/
+	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
+	char*			to,	/*!< out: converted identifier */
+	const char*		from,	/*!< in: identifier to convert */
+	ulint			len);	/*!< in: length of 'to', in bytes; should
+					be at least 5 * strlen(to) + 1 */
+/******************************************************************//**
+Converts an identifier to UTF-8. */
+UNIV_INTERN
+void
+innobase_convert_from_id(
+/*=====================*/
+	struct charset_info_st*	cs,	/*!< in: the 'from' character set */
+	char*			to,	/*!< out: converted identifier */
+	const char*		from,	/*!< in: identifier to convert */
+	ulint			len);	/*!< in: length of 'to', in bytes;
+					should be at least 3 * strlen(to) + 1 */
+/******************************************************************//**
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+UNIV_INTERN
+void
+innobase_casedn_str(
+/*================*/
+	char*	a);	/*!< in/out: string to put in lower case */
+
+/**********************************************************************//**
+Determines the connection character set.
+@return	connection character set */
+UNIV_INTERN
+struct charset_info_st*
+innobase_get_charset(
+/*=================*/
+	THD*	thd);	/*!< in: MySQL thread handle */
+/**********************************************************************//**
+Determines the current SQL statement.
+@return	SQL statement string */
+UNIV_INTERN
+const char*
+innobase_get_stmt(
+/*==============*/
+	THD*	thd,		/*!< in: MySQL thread handle */
+	size_t*	length)		/*!< out: length of the SQL statement */
+	__attribute__((nonnull));
+/******************************************************************//**
+This function is used to find the storage length in bytes of the first n
+characters for prefix indexes using a multibyte character set. The function
+finds charset information and returns length of prefix_len characters in the
+index field in bytes.
+@return	number of bytes occupied by the first n characters */
+UNIV_INTERN
+ulint
+innobase_get_at_most_n_mbchars(
+/*===========================*/
+	ulint charset_id,	/*!< in: character set id */
+	ulint prefix_len,	/*!< in: prefix length in bytes of the index
+				(this has to be divided by mbmaxlen to get the
+				number of CHARACTERS n in the prefix) */
+	ulint data_len,		/*!< in: length of the string in bytes */
+	const char* str);	/*!< in: character string */
+
+/*************************************************************//**
+InnoDB index push-down condition check
+@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
+UNIV_INTERN
+enum icp_result
+innobase_index_cond(
+/*================*/
+	void*	file)	/*!< in/out: pointer to ha_innobase */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Returns true if the thread supports XA,
+global value of innodb_supports_xa if thd is NULL.
+@return	true if thd supports XA */
+UNIV_INTERN
+ibool
+thd_supports_xa(
+/*============*/
+	THD*	thd);	/*!< in: thread handle, or NULL to query
+			the global innodb_supports_xa */
+
+/******************************************************************//**
+Returns the lock wait timeout for the current connection.
+@return	the lock wait timeout, in seconds */
+UNIV_INTERN
+ulong
+thd_lock_wait_timeout(
+/*==================*/
+	THD*	thd);	/*!< in: thread handle, or NULL to query
+			the global innodb_lock_wait_timeout */
+/******************************************************************//**
+Add up the time waited for the lock for the current query. */
+UNIV_INTERN
+void
+thd_set_lock_wait_time(
+/*===================*/
+	THD*	thd,	/*!< in/out: thread handle */
+	ulint	value);	/*!< in: time waited for the lock */
+
+/**********************************************************************//**
+Get the current setting of the table_cache_size global parameter. We do
+a dirty read because for one there is no synchronization object and
+secondly there is little harm in doing so even if we get a torn read.
+@return	SQL statement string */
+UNIV_INTERN
+ulint
+innobase_get_table_cache_size(void);
+/*===============================*/
+
+/**********************************************************************//**
+Get the current setting of the lower_case_table_names global parameter from
+mysqld.cc. We do a dirty read because for one there is no synchronization
+object and secondly there is little harm in doing so even if we get a torn
+read.
+@return	value of lower_case_table_names */
+UNIV_INTERN
+ulint
+innobase_get_lower_case_table_names(void);
+/*=====================================*/
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+int
+innobase_close_thd(
+/*===============*/
+	THD*	thd);		/*!< in: MySQL thread handle for
+				which to close the connection */
+/*************************************************************//**
+Get the next token from the given string and store it in *token. */
+UNIV_INTERN
+ulint
+innobase_mysql_fts_get_token(
+/*=========================*/
+	CHARSET_INFO*	charset,	/*!< in: Character set */
+	const byte*	start,		/*!< in: start of text */
+	const byte*	end,		/*!< in: one character past end of
+					text */
+	fts_string_t*	token,		/*!< out: token's text */
+	ulint*		offset);	/*!< out: offset to token,
+					measured as characters from
+					'start' */
+
+/******************************************************************//**
+compare two character string case insensitively according to their charset. */
+UNIV_INTERN
+int
+innobase_fts_text_case_cmp(
+/*=======================*/
+	const void*	cs,		/*!< in: Character set */
+	const void*	p1,		/*!< in: key */
+	const void*	p2);		/*!< in: node */
+
+/****************************************************************//**
+Get FTS field charset info from the field's prtype
+@return charset info */
+UNIV_INTERN
+CHARSET_INFO*
+innobase_get_fts_charset(
+/*=====================*/
+	int		mysql_type,	/*!< in: MySQL type */
+	uint		charset_number);/*!< in: number of the charset */
+/******************************************************************//**
+Returns true if transaction should be flagged as read-only.
+@return	true if the thd is marked as read-only */
+UNIV_INTERN
+ibool
+thd_trx_is_read_only(
+/*=================*/
+	THD*	thd);	/*!< in/out: thread handle */
+
+/******************************************************************//**
+Check if the transaction is an auto-commit transaction. TRUE also
+implies that it is a SELECT (read-only) transaction.
+@return	true if the transaction is an auto commit read-only transaction. */
+UNIV_INTERN
+ibool
+thd_trx_is_auto_commit(
+/*===================*/
+	THD*	thd);	/*!< in: thread handle, or NULL */
+
+/*****************************************************************//**
+A wrapper function of innobase_convert_name(), convert a table or
+index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return	pointer to the end of buf */
+UNIV_INTERN
+void
+innobase_format_name(
+/*==================*/
+	char*		buf,		/*!< out: buffer for converted
+					identifier */
+	ulint		buflen,		/*!< in: length of buf, in bytes */
+	const char*	name,		/*!< in: index or table name
+					to format */
+	ibool		is_index_name)	/*!< in: index name */
+	__attribute__((nonnull));
+
+/** Corresponds to Sql_condition:enum_warning_level. */
+enum ib_log_level_t {
+	IB_LOG_LEVEL_INFO,
+	IB_LOG_LEVEL_WARN,
+	IB_LOG_LEVEL_ERROR,
+	IB_LOG_LEVEL_FATAL
+};
+
+/******************************************************************//**
+Use this when the args are first converted to a formatted string and then
+passed to the format string from errmsg-utf8.txt. The error message format
+must be: "Some string ... %s".
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+	THD *thd, Sql_condition::enum_warning_level level,
+	uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_errf(
+/*====*/
+	THD*		thd,		/*!< in/out: session */
+	ib_log_level_t	level,		/*!< in: warning level */
+	ib_uint32_t	code,		/*!< MySQL error code */
+	const char*	format,		/*!< printf format */
+	...)				/*!< Args */
+	__attribute__((format(printf, 4, 5)));
+
+/******************************************************************//**
+Use this when the args are passed to the format string from
+errmsg-utf8.txt directly as is.
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+	THD *thd, Sql_condition::enum_warning_level level,
+	uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_senderrf(
+/*========*/
+	THD*		thd,		/*!< in/out: session */
+	ib_log_level_t	level,		/*!< in: warning level */
+	ib_uint32_t	code,		/*!< MySQL error code */
+	...);				/*!< Args */
+
+/******************************************************************//**
+Write a message to the MySQL log, prefixed with "InnoDB: ".
+Wrapper around sql_print_information() */
+UNIV_INTERN
+void
+ib_logf(
+/*====*/
+	ib_log_level_t	level,		/*!< in: warning level */
+	const char*	format,		/*!< printf format */
+	...)				/*!< Args */
+	__attribute__((format(printf, 2, 3)));
+
+/******************************************************************//**
+Returns the NUL terminated value of glob_hostname.
+@return	pointer to glob_hostname. */
+UNIV_INTERN
+const char*
+server_get_hostname();
+/*=================*/
+
+/******************************************************************//**
+Get the error message format string.
+@return the format string or 0 if not found. */
+UNIV_INTERN
+const char*
+innobase_get_err_msg(
+/*=================*/
+	int	error_code);	/*!< in: MySQL error code */
+
+/*********************************************************************//**
+Compute the next autoinc value.
+
+For MySQL replication the autoincrement values can be partitioned among
+the nodes. The offset is the start or origin of the autoincrement value
+for a particular node. For n nodes the increment will be n and the offset
+will be in the interval [1, n]. The formula tries to allocate the next
+value for a particular node.
+
+Note: This function is also called with increment set to the number of
+values we want to reserve for multi-value inserts e.g.,
+
+	INSERT INTO T VALUES(), (), ();
+
+innobase_next_autoinc() will be called with increment set to 3 where
+autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
+the multi-value INSERT above.
+@return	the next value */
+UNIV_INTERN
+ulonglong
+innobase_next_autoinc(
+/*==================*/
+	ulonglong	current,	/*!< in: Current value */
+	ulonglong	need,		/*!< in: count of values needed */
+	ulonglong	step,		/*!< in: AUTOINC increment step */
+	ulonglong	offset,		/*!< in: AUTOINC offset */
+	ulonglong	max_value)	/*!< in: max value for type */
+	__attribute__((pure, warn_unused_result));
+
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+	const Field*	field)	/*!< in: MySQL field */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/**********************************************************************
+Check if the length of the identifier exceeds the maximum allowed.
+The input to this function is an identifier in charset my_charset_filename.
+return true when length of identifier is too long. */
+UNIV_INTERN
+my_bool
+innobase_check_identifier_length(
+/*=============================*/
+	const char*	id);	/* in: identifier to check.  it must belong
+				to charset my_charset_filename */
+
+/**********************************************************************
+Converts an identifier from my_charset_filename to UTF-8 charset. */
+uint
+innobase_convert_to_system_charset(
+/*===============================*/
+	char*           to,		/* out: converted identifier */
+	const char*     from,		/* in: identifier to convert */
+	ulint           len,		/* in: length of 'to', in bytes */
+	uint*		errors);	/* out: error return */
+
+/**********************************************************************
+Converts an identifier from my_charset_filename to UTF-8 charset. */
+uint
+innobase_convert_to_filename_charset(
+/*=================================*/
+	char*           to,     /* out: converted identifier */
+	const char*     from,   /* in: identifier to convert */
+	ulint           len);   /* in: length of 'to', in bytes */
+
+
+#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
new file mode 100644
index 00000000000..66b963ae39a
--- /dev/null
+++ b/storage/innobase/include/handler0alter.h
@@ -0,0 +1,114 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/handler0alter.h
+Smart ALTER TABLE
+*******************************************************/
+
+/*************************************************************//**
+Copies an InnoDB record to table->record[0]. */
+UNIV_INTERN
+void
+innobase_rec_to_mysql(
+/*==================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(
+					rec, index, ...) */
+	__attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB index entry to table->record[0]. */
+UNIV_INTERN
+void
+innobase_fields_to_mysql(
+/*=====================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const dict_index_t*	index,	/*!< in: InnoDB index */
+	const dfield_t*		fields)	/*!< in: InnoDB index fields */
+	__attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB row to table->record[0]. */
+UNIV_INTERN
+void
+innobase_row_to_mysql(
+/*==================*/
+	struct TABLE*		table,	/*!< in/out: MySQL table */
+	const dict_table_t*	itab,	/*!< in: InnoDB table */
+	const dtuple_t*		row)	/*!< in: InnoDB row */
+	__attribute__((nonnull));
+
+/*************************************************************//**
+Resets table->record[0]. */
+UNIV_INTERN
+void
+innobase_rec_reset(
+/*===============*/
+	struct TABLE*		table)		/*!< in/out: MySQL table */
+	__attribute__((nonnull));
+
+/** Generate the next autoinc based on a snapshot of the session
+auto_increment_increment and auto_increment_offset variables. */
+struct ib_sequence_t {
+
+	/**
+	@param thd - the session
+	@param start_value - the lower bound
+	@param max_value - the upper bound (inclusive) */
+	ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
+
+	/**
+	Postfix increment
+	@return the value to insert */
+	ulonglong operator++(int) UNIV_NOTHROW;
+
+	/** Check if the autoinc "sequence" is exhausted.
+	@return true if the sequence is exhausted */
+	bool eof() const UNIV_NOTHROW
+	{
+		return(m_eof);
+	}
+
+	/**
+	@return the next value in the sequence */
+	ulonglong last() const UNIV_NOTHROW
+	{
+		ut_ad(m_next_value > 0);
+
+		return(m_next_value);
+	}
+
+	/** Maximum calumn value if adding an AUTOINC column else 0. Once
+	we reach the end of the sequence it will be set to ~0. */
+	const ulonglong	m_max_value;
+
+	/** Value of auto_increment_increment */
+	ulong		m_increment;
+
+	/** Value of auto_increment_offset */
+	ulong		m_offset;
+
+	/** Next value in the sequence */
+	ulonglong	m_next_value;
+
+	/** true if no more values left in the sequence */
+	bool		m_eof;
+};
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
new file mode 100644
index 00000000000..6f9a628df5d
--- /dev/null
+++ b/storage/innobase/include/hash0hash.h
@@ -0,0 +1,575 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/hash0hash.h
+The simple hash table utility
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef hash0hash_h
+#define hash0hash_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
+
+struct hash_table_t;
+struct hash_cell_t;
+
+typedef void*	hash_node_t;
+
+/* Fix Bug #13859: symbol collision between imap/mysql */
+#define hash_create hash0_create
+
+/* Differnt types of hash_table based on the synchronization
+method used for it. */
+enum hash_table_sync_t {
+	HASH_TABLE_SYNC_NONE = 0,	/*!< Don't use any internal
+					synchronization objects for
+					this hash_table. */
+	HASH_TABLE_SYNC_MUTEX,		/*!< Use mutexes to control
+					access to this hash_table. */
+	HASH_TABLE_SYNC_RW_LOCK		/*!< Use rw_locks to control
+					access to this hash_table. */
+};
+
+/*************************************************************//**
+Creates a hash table with >= n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n.
+@return	own: created table */
+UNIV_INTERN
+hash_table_t*
+hash_create(
+/*========*/
+	ulint	n);	/*!< in: number of array cells */
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Creates a sync object array array to protect a hash table.
+::sync_obj can be mutexes or rw_locks depening on the type of
+hash table. */
+UNIV_INTERN
+void
+hash_create_sync_obj_func(
+/*======================*/
+	hash_table_t*		table,	/*!< in: hash table */
+	enum hash_table_sync_t	type,	/*!< in: HASH_TABLE_SYNC_MUTEX
+					or HASH_TABLE_SYNC_RW_LOCK */
+#ifdef UNIV_SYNC_DEBUG
+	ulint			sync_level,/*!< in: latching order level
+					of the mutexes: used in the
+					debug version */
+#endif /* UNIV_SYNC_DEBUG */
+	ulint			n_sync_obj);/*!< in: number of sync objects,
+					must be a power of 2 */
+#ifdef UNIV_SYNC_DEBUG
+# define hash_create_sync_obj(t, s, n, level)			\
+			hash_create_sync_obj_func(t, s, level, n)
+#else /* UNIV_SYNC_DEBUG */
+# define hash_create_sync_obj(t, s, n, level)			\
+			hash_create_sync_obj_func(t, s, n)
+#endif /* UNIV_SYNC_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Frees a hash table. */
+UNIV_INTERN
+void
+hash_table_free(
+/*============*/
+	hash_table_t*	table);	/*!< in, own: hash table */
+/**************************************************************//**
+Calculates the hash value from a folded value.
+@return	hashed value */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+	ulint		fold,	/*!< in: folded value */
+	hash_table_t*	table);	/*!< in: hash table */
+#ifndef UNIV_HOTBACKUP
+/********************************************************************//**
+Assert that the mutex for the table is held */
+# define HASH_ASSERT_OWN(TABLE, FOLD)				\
+	ut_ad((TABLE)->type != HASH_TABLE_SYNC_MUTEX		\
+	      || (mutex_own(hash_get_mutex((TABLE), FOLD))));
+#else /* !UNIV_HOTBACKUP */
+# define HASH_ASSERT_OWN(TABLE, FOLD)
+#endif /* !UNIV_HOTBACKUP */
+
+/*******************************************************************//**
+Inserts a struct to a hash table. */
+
+#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
+do {\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+\
+	HASH_ASSERT_OWN(TABLE, FOLD)\
+\
+	(DATA)->NAME = NULL;\
+\
+	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	if (cell3333->node == NULL) {\
+		cell3333->node = DATA;\
+	} else {\
+		struct3333 = (TYPE*) cell3333->node;\
+\
+		while (struct3333->NAME != NULL) {\
+\
+			struct3333 = (TYPE*) struct3333->NAME;\
+		}\
+\
+		struct3333->NAME = DATA;\
+	}\
+} while (0)
+
+#ifdef UNIV_HASH_DEBUG
+# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
+# define HASH_INVALIDATE(DATA, NAME) *(void**) (&DATA->NAME) = (void*) -1
+#else
+# define HASH_ASSERT_VALID(DATA) do {} while (0)
+# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
+#endif
+
+/*******************************************************************//**
+Deletes a struct from a hash table. */
+
+#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
+do {\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+\
+	HASH_ASSERT_OWN(TABLE, FOLD)\
+\
+	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	if (cell3333->node == DATA) {\
+		HASH_ASSERT_VALID(DATA->NAME);\
+		cell3333->node = DATA->NAME;\
+	} else {\
+		struct3333 = (TYPE*) cell3333->node;\
+\
+		while (struct3333->NAME != DATA) {\
+\
+			struct3333 = (TYPE*) struct3333->NAME;\
+			ut_a(struct3333);\
+		}\
+\
+		struct3333->NAME = DATA->NAME;\
+	}\
+	HASH_INVALIDATE(DATA, NAME);\
+} while (0)
+
+/*******************************************************************//**
+Gets the first struct in a hash chain, NULL if none. */
+
+#define HASH_GET_FIRST(TABLE, HASH_VAL)\
+	(hash_get_nth_cell(TABLE, HASH_VAL)->node)
+
+/*******************************************************************//**
+Gets the next struct in a hash chain, NULL if none. */
+
+#define HASH_GET_NEXT(NAME, DATA)	((DATA)->NAME)
+
+/********************************************************************//**
+Looks for a struct in a hash table. */
+#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
+{\
+\
+	HASH_ASSERT_OWN(TABLE, FOLD)\
+\
+	(DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+	HASH_ASSERT_VALID(DATA);\
+\
+	while ((DATA) != NULL) {\
+		ASSERTION;\
+		if (TEST) {\
+			break;\
+		} else {\
+			HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
+			(DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
+		}\
+	}\
+}
+
+/********************************************************************//**
+Looks for an item in all hash buckets. */
+#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST)	\
+do {									\
+	ulint	i3333;							\
+									\
+	for (i3333 = (TABLE)->n_cells; i3333--; ) {			\
+		(DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333);		\
+									\
+		while ((DATA) != NULL) {				\
+			HASH_ASSERT_VALID(DATA);			\
+			ASSERTION;					\
+									\
+			if (TEST) {					\
+				break;					\
+			}						\
+									\
+			(DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);	\
+		}							\
+									\
+		if ((DATA) != NULL) {					\
+			break;						\
+		}							\
+	}								\
+} while (0)
+
+/************************************************************//**
+Gets the nth cell in a hash table.
+@return	pointer to cell */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		n);	/*!< in: cell index */
+
+/*************************************************************//**
+Clears a hash table so that all the cells become empty. */
+UNIV_INLINE
+void
+hash_table_clear(
+/*=============*/
+	hash_table_t*	table);	/*!< in/out: hash table */
+
+/*************************************************************//**
+Returns the number of cells in a hash table.
+@return	number of cells */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+	hash_table_t*	table);	/*!< in: table */
+/*******************************************************************//**
+Deletes a struct which is stored in the heap of the hash table, and compacts
+the heap. The fold value must be stored in the struct NODE in a field named
+'fold'. */
+
+#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
+do {\
+	TYPE*		node111;\
+	TYPE*		top_node111;\
+	hash_cell_t*	cell111;\
+	ulint		fold111;\
+\
+	fold111 = (NODE)->fold;\
+\
+	HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
+\
+	top_node111 = (TYPE*) mem_heap_get_top(\
+				hash_get_heap(TABLE, fold111),\
+							sizeof(TYPE));\
+\
+	/* If the node to remove is not the top node in the heap, compact the\
+	heap of nodes by moving the top node in the place of NODE. */\
+\
+	if (NODE != top_node111) {\
+\
+		/* Copy the top node in place of NODE */\
+\
+		*(NODE) = *top_node111;\
+\
+		cell111 = hash_get_nth_cell(TABLE,\
+				hash_calc_hash(top_node111->fold, TABLE));\
+\
+		/* Look for the pointer to the top node, to update it */\
+\
+		if (cell111->node == top_node111) {\
+			/* The top node is the first in the chain */\
+\
+			cell111->node = NODE;\
+		} else {\
+			/* We have to look for the predecessor of the top\
+			node */\
+			node111 = static_cast<TYPE*>(cell111->node);\
+\
+			while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
+\
+				node111 = static_cast<TYPE*>(\
+					HASH_GET_NEXT(NAME, node111));\
+			}\
+\
+			/* Now we have the predecessor node */\
+\
+			node111->NAME = NODE;\
+		}\
+	}\
+\
+	/* Free the space occupied by the top node */\
+\
+	mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
+} while (0)
+
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Move all hash table entries from OLD_TABLE to NEW_TABLE. */
+
+#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
+do {\
+	ulint		i2222;\
+	ulint		cell_count2222;\
+\
+	cell_count2222 = hash_get_n_cells(OLD_TABLE);\
+\
+	for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
+		NODE_TYPE*	node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
+\
+		while (node2222) {\
+			NODE_TYPE*	next2222 = node2222->PTR_NAME;\
+			ulint		fold2222 = FOLD_FUNC(node2222);\
+\
+			HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
+				fold2222, node2222);\
+\
+			node2222 = next2222;\
+		}\
+	}\
+} while (0)
+
+/************************************************************//**
+Gets the sync object index for a fold value in a hash table.
+@return	index */
+UNIV_INLINE
+ulint
+hash_get_sync_obj_index(
+/*====================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Gets the nth heap in a hash table.
+@return	mem heap */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i);	/*!< in: index of the heap */
+/************************************************************//**
+Gets the heap for a fold value in a hash table.
+@return	mem heap */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Gets the nth mutex in a hash table.
+@return	mutex */
+UNIV_INLINE
+ib_mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i);	/*!< in: index of the mutex */
+/************************************************************//**
+Gets the nth rw_lock in a hash table.
+@return	rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_get_nth_lock(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i);	/*!< in: index of the rw_lock */
+/************************************************************//**
+Gets the mutex for a fold value in a hash table.
+@return	mutex */
+UNIV_INLINE
+ib_mutex_t*
+hash_get_mutex(
+/*===========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Gets the rw_lock for a fold value in a hash table.
+@return	rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_get_lock(
+/*==========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Reserves the mutex for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_mutex_enter(
+/*=============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Releases the mutex for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit(
+/*============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Reserves all the mutexes of a hash table, in an ascending order. */
+UNIV_INTERN
+void
+hash_mutex_enter_all(
+/*=================*/
+	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
+Releases all the mutexes of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all(
+/*================*/
+	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
+Releases all but the passed in mutex of a hash table. */
+UNIV_INTERN
+void
+hash_mutex_exit_all_but(
+/*====================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	ib_mutex_t*	keep_mutex);	/*!< in: mutex to keep */
+/************************************************************//**
+s-lock a lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_lock_s(
+/*========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+x-lock a lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_lock_x(
+/*========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+unlock an s-lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_unlock_s(
+/*==========*/
+
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+unlock x-lock for a fold value in a hash table. */
+UNIV_INTERN
+void
+hash_unlock_x(
+/*==========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold);	/*!< in: fold */
+/************************************************************//**
+Reserves all the locks of a hash table, in an ascending order. */
+UNIV_INTERN
+void
+hash_lock_x_all(
+/*============*/
+	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
+Releases all the locks of a hash table, in an ascending order. */
+UNIV_INTERN
+void
+hash_unlock_x_all(
+/*==============*/
+	hash_table_t*	table);	/*!< in: hash table */
+/************************************************************//**
+Releases all but passed in lock of a hash table, */
+UNIV_INTERN
+void
+hash_unlock_x_all_but(
+/*==================*/
+	hash_table_t*	table,		/*!< in: hash table */
+	rw_lock_t*	keep_lock);	/*!< in: lock to keep */
+
+#else /* !UNIV_HOTBACKUP */
+# define hash_get_heap(table, fold)	((table)->heap)
+# define hash_mutex_enter(table, fold)	((void) 0)
+# define hash_mutex_exit(table, fold)	((void) 0)
+# define hash_mutex_enter_all(table)	((void) 0)
+# define hash_mutex_exit_all(table)	((void) 0)
+# define hash_mutex_exit_all_but(t, m)	((void) 0)
+# define hash_lock_s(t, f)		((void) 0)
+# define hash_lock_x(t, f)		((void) 0)
+# define hash_unlock_s(t, f)		((void) 0)
+# define hash_unlock_x(t, f)		((void) 0)
+# define hash_lock_x_all(t)		((void) 0)
+# define hash_unlock_x_all(t)		((void) 0)
+# define hash_unlock_x_all_but(t, l)	((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+struct hash_cell_t{
+	void*	node;	/*!< hash chain node, NULL if none */
+};
+
+/* The hash table structure */
+struct hash_table_t {
+	enum hash_table_sync_t	type;	/*<! type of hash_table. */
+#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+	ibool			adaptive;/* TRUE if this is the hash
+					table of the adaptive hash
+					index */
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+	ulint			n_cells;/* number of cells in the hash table */
+	hash_cell_t*		array;	/*!< pointer to cell array */
+#ifndef UNIV_HOTBACKUP
+	ulint			n_sync_obj;/* if sync_objs != NULL, then
+					the number of either the number
+					of mutexes or the number of
+					rw_locks depending on the type.
+					Must be a power of 2 */
+	union {
+		ib_mutex_t*	mutexes;/* NULL, or an array of mutexes
+					used to protect segments of the
+					hash table */
+		rw_lock_t*	rw_locks;/* NULL, or an array of rw_lcoks
+					used to protect segments of the
+					hash table */
+	} sync_obj;
+
+	mem_heap_t**		heaps;	/*!< if this is non-NULL, hash
+					chain nodes for external chaining
+					can be allocated from these memory
+					heaps; there are then n_mutexes
+					many of these heaps */
+#endif /* !UNIV_HOTBACKUP */
+	mem_heap_t*		heap;
+#ifdef UNIV_DEBUG
+	ulint			magic_n;
+# define HASH_TABLE_MAGIC_N	76561114
+#endif /* UNIV_DEBUG */
+};
+
+#ifndef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
new file mode 100644
index 00000000000..254f3f82e5d
--- /dev/null
+++ b/storage/innobase/include/hash0hash.ic
@@ -0,0 +1,225 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/hash0hash.ic
+The simple hash table utility
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ut0rnd.h"
+
+/************************************************************//**
+Gets the nth cell in a hash table.
+@return	pointer to cell */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		n)	/*!< in: cell index */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	ut_ad(n < table->n_cells);
+
+	return(table->array + n);
+}
+
+/*************************************************************//**
+Clears a hash table so that all the cells become empty. */
+UNIV_INLINE
+void
+hash_table_clear(
+/*=============*/
+	hash_table_t*	table)	/*!< in/out: hash table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	memset(table->array, 0x0,
+	       table->n_cells * sizeof(*table->array));
+}
+
+/*************************************************************//**
+Returns the number of cells in a hash table.
+@return	number of cells */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+	hash_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	return(table->n_cells);
+}
+
+/**************************************************************//**
+Calculates the hash value from a folded value.
+@return	hashed value */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+	ulint		fold,	/*!< in: folded value */
+	hash_table_t*	table)	/*!< in: hash table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	return(ut_hash_ulint(fold, table->n_cells));
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Gets the sync object index for a fold value in a hash table.
+@return	index */
+UNIV_INLINE
+ulint
+hash_get_sync_obj_index(
+/*====================*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	ut_ad(table->type != HASH_TABLE_SYNC_NONE);
+	ut_ad(ut_is_2pow(table->n_sync_obj));
+	return(ut_2pow_remainder(hash_calc_hash(fold, table),
+				 table->n_sync_obj));
+}
+
+/************************************************************//**
+Gets the nth heap in a hash table.
+@return	mem heap */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i)	/*!< in: index of the heap */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	ut_ad(table->type != HASH_TABLE_SYNC_NONE);
+	ut_ad(i < table->n_sync_obj);
+
+	return(table->heaps[i]);
+}
+
+/************************************************************//**
+Gets the heap for a fold value in a hash table.
+@return	mem heap */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ulint	i;
+
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
+	if (table->heap) {
+		return(table->heap);
+	}
+
+	i = hash_get_sync_obj_index(table, fold);
+
+	return(hash_get_nth_heap(table, i));
+}
+
+/************************************************************//**
+Gets the nth mutex in a hash table.
+@return	mutex */
+UNIV_INLINE
+ib_mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i)	/*!< in: index of the mutex */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
+	ut_ad(i < table->n_sync_obj);
+
+	return(table->sync_obj.mutexes + i);
+}
+
+/************************************************************//**
+Gets the mutex for a fold value in a hash table.
+@return	mutex */
+UNIV_INLINE
+ib_mutex_t*
+hash_get_mutex(
+/*===========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ulint	i;
+
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
+	i = hash_get_sync_obj_index(table, fold);
+
+	return(hash_get_nth_mutex(table, i));
+}
+
+/************************************************************//**
+Gets the nth rw_lock in a hash table.
+@return	rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_get_nth_lock(
+/*==============*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		i)	/*!< in: index of the rw_lock */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
+	ut_ad(i < table->n_sync_obj);
+
+	return(table->sync_obj.rw_locks + i);
+}
+
+/************************************************************//**
+Gets the rw_lock for a fold value in a hash table.
+@return	rw_lock */
+UNIV_INLINE
+rw_lock_t*
+hash_get_lock(
+/*==========*/
+	hash_table_t*	table,	/*!< in: hash table */
+	ulint		fold)	/*!< in: fold */
+{
+	ulint	i;
+
+	ut_ad(table);
+	ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
+	i = hash_get_sync_obj_index(table, fold);
+
+	return(hash_get_nth_lock(table, i));
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
new file mode 100644
index 00000000000..9c3b686c998
--- /dev/null
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -0,0 +1,467 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0ibuf.h
+Insert buffer
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0ibuf_h
+#define ibuf0ibuf_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "dict0mem.h"
+#include "fsp0fsp.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "ibuf0types.h"
+
+/** Default value for maximum on-disk size of change buffer in terms
+of percentage of the buffer pool. */
+#define CHANGE_BUFFER_DEFAULT_SIZE	(25)
+
+/* Possible operations buffered in the insert/whatever buffer. See
+ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
+typedef enum {
+	IBUF_OP_INSERT = 0,
+	IBUF_OP_DELETE_MARK = 1,
+	IBUF_OP_DELETE = 2,
+
+	/* Number of different operation types. */
+	IBUF_OP_COUNT = 3
+} ibuf_op_t;
+
+/** Combinations of operations that can be buffered.  Because the enum
+values are used for indexing innobase_change_buffering_values[], they
+should start at 0 and there should not be any gaps. */
+typedef enum {
+	IBUF_USE_NONE = 0,
+	IBUF_USE_INSERT,	/* insert */
+	IBUF_USE_DELETE_MARK,	/* delete */
+	IBUF_USE_INSERT_DELETE_MARK,	/* insert+delete */
+	IBUF_USE_DELETE,	/* delete+purge */
+	IBUF_USE_ALL,		/* insert+delete+purge */
+
+	IBUF_USE_COUNT		/* number of entries in ibuf_use_t */
+} ibuf_use_t;
+
+/** Operations that can currently be buffered. */
+extern ibuf_use_t	ibuf_use;
+
+/** The insert buffer control structure */
+extern ibuf_t*		ibuf;
+
+/* The purpose of the insert buffer is to reduce random disk access.
+When we wish to insert a record into a non-unique secondary index and
+the B-tree leaf page where the record belongs to is not in the buffer
+pool, we insert the record into the insert buffer B-tree, indexed by
+(space_id, page_no).  When the page is eventually read into the buffer
+pool, we look up the insert buffer B-tree for any modifications to the
+page, and apply these upon the completion of the read operation.  This
+is called the insert buffer merge. */
+
+/* The insert buffer merge must always succeed.  To guarantee this,
+the insert buffer subsystem keeps track of the free space in pages for
+which it can buffer operations.  Two bits per page in the insert
+buffer bitmap indicate the available space in coarse increments.  The
+free bits in the insert buffer bitmap must never exceed the free space
+on a page.  It is safe to decrement or reset the bits in the bitmap in
+a mini-transaction that is committed before the mini-transaction that
+affects the free space.  It is unsafe to increment the bits in a
+separately committed mini-transaction, because in crash recovery, the
+free bits could momentarily be set too high. */
+
+/******************************************************************//**
+Creates the insert buffer data structure at a database startup. */
+UNIV_INTERN
+void
+ibuf_init_at_db_start(void);
+/*=======================*/
+/*********************************************************************//**
+Updates the max_size value for ibuf. */
+UNIV_INTERN
+void
+ibuf_max_size_update(
+/*=================*/
+	ulint	new_val);	/*!< in: new value in terms of
+				percentage of the buffer pool size */
+/*********************************************************************//**
+Reads the biggest tablespace id from the high end of the insert buffer
+tree and updates the counter in fil_system. */
+UNIV_INTERN
+void
+ibuf_update_max_tablespace_id(void);
+/*===============================*/
+/***************************************************************//**
+Starts an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_start(
+/*===========*/
+	mtr_t*	mtr)	/*!< out: mini-transaction */
+	__attribute__((nonnull));
+/***************************************************************//**
+Commits an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_commit(
+/*============*/
+	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Initializes an ibuf bitmap page. */
+UNIV_INTERN
+void
+ibuf_bitmap_page_init(
+/*==================*/
+	buf_block_t*	block,	/*!< in: bitmap page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/************************************************************************//**
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to decrement or reset the bits in the bitmap in a mini-transaction
+that is committed before the mini-transaction that affects the free
+space. */
+UNIV_INTERN
+void
+ibuf_reset_free_bits(
+/*=================*/
+	buf_block_t*	block);	/*!< in: index page; free bits are set to 0
+				if the index is a non-clustered
+				non-unique, and page level is 0 */
+/************************************************************************//**
+Updates the free bits of an uncompressed page in the ibuf bitmap if
+there is not enough free on the page any more.  This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is
+unsafe to increment the bits in a separately committed
+mini-transaction, because in crash recovery, the free bits could
+momentarily be set too high.  It is only safe to use this function for
+decrementing the free bits.  Should more free space become available,
+we must not update the free bits here, because that would break crash
+recovery. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/*!< in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase);/*!< in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+/**********************************************************************//**
+Updates the free bits for an uncompressed page to reflect the present
+state.  Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_low(
+/*======================*/
+	const buf_block_t*	block,		/*!< in: index page */
+	ulint			max_ins_size,	/*!< in: value of
+						maximum insert size
+						with reorganize before
+						the latest operation
+						performed to the page */
+	mtr_t*			mtr);		/*!< in/out: mtr */
+/**********************************************************************//**
+Updates the free bits for a compressed page to reflect the present
+state.  Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_zip(
+/*======================*/
+	buf_block_t*	block,	/*!< in/out: index page */
+	mtr_t*		mtr);	/*!< in/out: mtr */
+/**********************************************************************//**
+Updates the free bits for the two pages to reflect the present state.
+Does this in the mtr given, which means that the latching order rules
+virtually prevent any further operations until mtr is committed.
+NOTE: The free bits in the insert buffer bitmap must never exceed the
+free space on a page.  It is safe to set the free bits in the same
+mini-transaction that updated the pages. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+	ulint		zip_size,/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	buf_block_t*	block1,	/*!< in: index page */
+	buf_block_t*	block2,	/*!< in: index page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index,			/*!< in: index where to insert */
+	ulint		ignore_sec_unique);	/*!< in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
+/******************************************************************//**
+Returns TRUE if the current OS thread is performing an insert buffer
+routine.
+
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INLINE
+ibool
+ibuf_inside(
+/*========*/
+	const mtr_t*	mtr)	/*!< in: mini-transaction */
+	__attribute__((nonnull, pure));
+/***********************************************************************//**
+Checks if a page address is an ibuf bitmap page (level 3 page) address.
+@return	TRUE if a bitmap page */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no);/*!< in: page number */
+/***********************************************************************//**
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==TRUE.
+@return	TRUE if level 2 or level 3 page */
+UNIV_INTERN
+ibool
+ibuf_page_low(
+/*==========*/
+	ulint		space,	/*!< in: space id */
+	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint		page_no,/*!< in: page number */
+#ifdef UNIV_DEBUG
+	ibool		x_latch,/*!< in: FALSE if relaxed check
+				(avoid latching the bitmap page) */
+#endif /* UNIV_DEBUG */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr which will contain an
+				x-latch to the bitmap page if the page
+				is not one of the fixed address ibuf
+				pages, or NULL, in which case a new
+				transaction is created. */
+	__attribute__((warn_unused_result));
+#ifdef UNIV_DEBUG
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
+pages.  Must not be called when recv_no_ibuf_operations==TRUE.
+@param space	tablespace identifier
+@param zip_size	compressed page size in bytes, or 0
+@param page_no	page number
+@param mtr	mini-transaction or NULL
+@return TRUE if level 2 or level 3 page */
+# define ibuf_page(space, zip_size, page_no, mtr)			\
+	ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr)
+#else /* UVIV_DEBUG */
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
+pages.  Must not be called when recv_no_ibuf_operations==TRUE.
+@param space	tablespace identifier
+@param zip_size	compressed page size in bytes, or 0
+@param page_no	page number
+@param mtr	mini-transaction or NULL
+@return TRUE if level 2 or level 3 page */
+# define ibuf_page(space, zip_size, page_no, mtr)			\
+	ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr)
+#endif /* UVIV_DEBUG */
+/***********************************************************************//**
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */
+UNIV_INTERN
+void
+ibuf_free_excess_pages(void);
+/*========================*/
+/*********************************************************************//**
+Buffer an operation in the insert/delete buffer, instead of doing it
+directly to the disk page, if this is possible. Does not do it if the index
+is clustered or unique.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+ibuf_insert(
+/*========*/
+	ibuf_op_t	op,	/*!< in: operation type */
+	const dtuple_t*	entry,	/*!< in: index entry to insert */
+	dict_index_t*	index,	/*!< in: index where to insert */
+	ulint		space,	/*!< in: space id where to insert */
+	ulint		zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint		page_no,/*!< in: page number where to insert */
+	que_thr_t*	thr);	/*!< in: query thread */
+/*********************************************************************//**
+When an index page is read from a disk to the buffer pool, this function
+applies any buffered operations to the page and deletes the entries from the
+insert buffer. If the page is not read, but created in the buffer pool, this
+function deletes its buffered entries from the insert buffer; there can
+exist entries for such a page if the page belonged to an index which
+subsequently was dropped. */
+UNIV_INTERN
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: if page has been read from
+				disk, pointer to the page x-latched,
+				else NULL */
+	ulint		space,	/*!< in: space id of the index page */
+	ulint		page_no,/*!< in: page number of the index page */
+	ulint		zip_size,/*!< in: compressed page size in bytes,
+				or 0 */
+	ibool		update_ibuf_bitmap);/*!< in: normally this is set
+				to TRUE, but if we have deleted or are
+				deleting the tablespace, then we
+				naturally do not want to update a
+				non-existent bitmap page */
+/*********************************************************************//**
+Deletes all entries in the insert buffer for a given space id. This is used
+in DISCARD TABLESPACE and IMPORT TABLESPACE.
+NOTE: this does not update the page free bitmaps in the space. The space will
+become CORRUPT when you call this function! */
+UNIV_INTERN
+void
+ibuf_delete_for_discarded_space(
+/*============================*/
+	ulint	space);	/*!< in: space id */
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
+ulint
+ibuf_contract_in_background(
+/*========================*/
+	table_id_t	table_id,	/*!< in: if merge should be done only
+					for a specific table, for all tables
+					this should be 0 */
+	ibool		full);		/*!< in: TRUE if the caller wants to
+					do a full contract based on PCT_IO(100).
+					If FALSE then the size of contract
+					batch is determined based on the
+					current size of the ibuf tree. */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Parses a redo log record of an ibuf bitmap page init.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: block or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/******************************************************************//**
+Gets the ibuf count for a given page.
+@return number of entries in the insert buffer currently buffered for
+this page */
+UNIV_INTERN
+ulint
+ibuf_count_get(
+/*===========*/
+	ulint	space,	/*!< in: space id */
+	ulint	page_no);/*!< in: page number */
+#endif
+/******************************************************************//**
+Looks if the insert buffer is empty.
+@return	true if empty */
+UNIV_INTERN
+bool
+ibuf_is_empty(void);
+/*===============*/
+/******************************************************************//**
+Prints info of ibuf. */
+UNIV_INTERN
+void
+ibuf_print(
+/*=======*/
+	FILE*	file);	/*!< in: file where to print */
+/********************************************************************
+Read the first two bytes from a record's fourth field (counter field in new
+records; something else in older records).
+@return	"counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
+UNIV_INTERN
+ulint
+ibuf_rec_get_counter(
+/*=================*/
+	const rec_t*	rec);	/*!< in: ibuf record */
+/******************************************************************//**
+Closes insert buffer and frees the data structures. */
+UNIV_INTERN
+void
+ibuf_close(void);
+/*============*/
+
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+	const trx_t*	trx,		/*!< in: transaction */
+	ulint		space_id)	/*!< in: tablespace identifier */
+	__attribute__((nonnull, warn_unused_result));
+
+#define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
+#define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
+
+#endif /* !UNIV_HOTBACKUP */
+
+/* The ibuf header page currently contains only the file segment header
+for the file segment from which the pages for the ibuf tree are allocated */
+#define IBUF_HEADER		PAGE_DATA
+#define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
+
+/* The insert buffer tree itself is always located in space 0. */
+#define IBUF_SPACE_ID		0
+
+#ifndef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
new file mode 100644
index 00000000000..21747fdceac
--- /dev/null
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -0,0 +1,367 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0ibuf.ic
+Insert buffer
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "page0page.h"
+#include "page0zip.h"
+#ifndef UNIV_HOTBACKUP
+#include "buf0lru.h"
+
+/** An index page must contain at least UNIV_PAGE_SIZE /
+IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
+buffer inserts to this page.  If there is this much of free space, the
+corresponding bits are set in the ibuf bitmap. */
+#define IBUF_PAGE_SIZE_PER_FREE_SPACE	32
+
+/***************************************************************//**
+Starts an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_start(
+/*===========*/
+	mtr_t*	mtr)	/*!< out: mini-transaction */
+{
+	mtr_start(mtr);
+	mtr->inside_ibuf = TRUE;
+}
+/***************************************************************//**
+Commits an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_commit(
+/*============*/
+	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+{
+	ut_ad(mtr->inside_ibuf);
+	ut_d(mtr->inside_ibuf = FALSE);
+	mtr_commit(mtr);
+}
+
+/** Insert buffer struct */
+struct ibuf_t{
+	ulint		size;		/*!< current size of the ibuf index
+					tree, in pages */
+	ulint		max_size;	/*!< recommended maximum size of the
+					ibuf index tree, in pages */
+	ulint		seg_size;	/*!< allocated pages of the file
+					segment containing ibuf header and
+					tree */
+	bool		empty;		/*!< Protected by the page
+					latch of the root page of the
+					insert buffer tree
+					(FSP_IBUF_TREE_ROOT_PAGE_NO). true
+					if and only if the insert
+					buffer tree is empty. */
+	ulint		free_list_len;	/*!< length of the free list */
+	ulint		height;		/*!< tree height */
+	dict_index_t*	index;		/*!< insert buffer index */
+
+	ulint		n_merges;	/*!< number of pages merged */
+	ulint		n_merged_ops[IBUF_OP_COUNT];
+					/*!< number of operations of each type
+					merged to index pages */
+	ulint		n_discarded_ops[IBUF_OP_COUNT];
+					/*!< number of operations of each type
+					discarded without merging due to the
+					tablespace being deleted or the
+					index being dropped */
+};
+
+/************************************************************************//**
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+UNIV_INTERN
+void
+ibuf_set_free_bits_func(
+/*====================*/
+	buf_block_t*	block,	/*!< in: index page of a non-clustered index;
+				free bit is reset if page level is 0 */
+#ifdef UNIV_IBUF_DEBUG
+	ulint		max_val,/*!< in: ULINT_UNDEFINED or a maximum
+				value which the bits must have before
+				setting; this is for debugging */
+#endif /* UNIV_IBUF_DEBUG */
+	ulint		val);	/*!< in: value to set: < 4 */
+#ifdef UNIV_IBUF_DEBUG
+# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
+#else /* UNIV_IBUF_DEBUG */
+# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
+#endif /* UNIV_IBUF_DEBUG */
+
+/**********************************************************************//**
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index,			/*!< in: index where to insert */
+	ulint		ignore_sec_unique)	/*!< in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
+{
+	return(ibuf_use != IBUF_USE_NONE
+	       && ibuf->max_size != 0
+	       && !dict_index_is_clust(index)
+	       && index->table->quiesce == QUIESCE_NONE
+	       && (ignore_sec_unique || !dict_index_is_unique(index)));
+}
+
+/******************************************************************//**
+Returns TRUE if the current OS thread is performing an insert buffer
+routine.
+
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INLINE
+ibool
+ibuf_inside(
+/*========*/
+	const mtr_t*	mtr)	/*!< in: mini-transaction */
+{
+	return(mtr->inside_ibuf);
+}
+
+/***********************************************************************//**
+Checks if a page address is an ibuf bitmap page address.
+@return	TRUE if a bitmap page */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	page_no)/*!< in: page number */
+{
+	ut_ad(ut_is_2pow(zip_size));
+
+	if (!zip_size) {
+		return((page_no & (UNIV_PAGE_SIZE - 1))
+			== FSP_IBUF_BITMAP_OFFSET);
+	}
+
+	return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET);
+}
+
+/*********************************************************************//**
+Translates the free space on a page to a value in the ibuf bitmap.
+@return	value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_bits(
+/*===========================*/
+	ulint	zip_size,	/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	ulint	max_ins_size)	/*!< in: maximum insert size after reorganize
+				for the page */
+{
+	ulint	n;
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+
+	if (zip_size) {
+		n = max_ins_size
+			/ (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	} else {
+		n = max_ins_size
+			/ (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	if (n == 3) {
+		n = 2;
+	}
+
+	if (n > 3) {
+		n = 3;
+	}
+
+	return(n);
+}
+
+/*********************************************************************//**
+Translates the ibuf free bits to the free space on a page in bytes.
+@return	maximum insert size after reorganize for the page */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_from_bits(
+/*================================*/
+	ulint	zip_size,/*!< in: compressed page size in bytes;
+			0 for uncompressed pages */
+	ulint	bits)	/*!< in: value for ibuf bitmap bits */
+{
+	ut_ad(bits < 4);
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
+
+	if (zip_size) {
+		if (bits == 3) {
+			return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+		}
+
+		return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	if (bits == 3) {
+		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
+}
+
+/*********************************************************************//**
+Translates the free space on a compressed page to a value in the ibuf bitmap.
+@return	value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_zip(
+/*==========================*/
+	ulint			zip_size,
+					/*!< in: compressed page size in bytes */
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	ulint			max_ins_size;
+	const page_zip_des_t*	page_zip;
+	lint			zip_max_ins;
+
+	ut_ad(zip_size == buf_block_get_zip_size(block));
+	ut_ad(zip_size);
+
+	/* Consider the maximum insert size on the uncompressed page
+	without reorganizing the page. We must not assume anything
+	about the compression ratio. If zip_max_ins > max_ins_size and
+	there is 1/4 garbage on the page, recompression after the
+	reorganize could fail, in theory. So, let us guarantee that
+	merging a buffered insert to a compressed page will always
+	succeed without reorganizing or recompressing the page, just
+	by using the page modification log. */
+	max_ins_size = page_get_max_insert_size(
+		buf_block_get_frame(block), 1);
+
+	page_zip = buf_block_get_page_zip(block);
+	zip_max_ins = page_zip_max_ins_size(page_zip,
+					    FALSE/* not clustered */);
+
+	if (zip_max_ins < 0) {
+		return(0);
+	} else if (max_ins_size > (ulint) zip_max_ins) {
+		max_ins_size = (ulint) zip_max_ins;
+	}
+
+	return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
+}
+
+/*********************************************************************//**
+Translates the free space on a page to a value in the ibuf bitmap.
+@return	value for ibuf bitmap bits */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free(
+/*======================*/
+	ulint			zip_size,/*!< in: compressed page size in bytes;
+					0 for uncompressed pages */
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	ut_ad(zip_size == buf_block_get_zip_size(block));
+
+	if (!zip_size) {
+		ulint	max_ins_size;
+
+		max_ins_size = page_get_max_insert_size_after_reorganize(
+			buf_block_get_frame(block), 1);
+
+		return(ibuf_index_page_calc_free_bits(0, max_ins_size));
+	} else {
+		return(ibuf_index_page_calc_free_zip(zip_size, block));
+	}
+}
+
+/************************************************************************//**
+Updates the free bits of an uncompressed page in the ibuf bitmap if
+there is not enough free on the page any more.  This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept.  NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page.  It is
+unsafe to increment the bits in a separately committed
+mini-transaction, because in crash recovery, the free bits could
+momentarily be set too high.  It is only safe to use this function for
+decrementing the free bits.  Should more free space become available,
+we must not update the free bits here, because that would break crash
+recovery. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/*!< in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase)/*!< in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+{
+	ulint	before;
+	ulint	after;
+
+	ut_ad(!buf_block_get_page_zip(block));
+
+	before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+
+	if (max_ins_size >= increase) {
+#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
+# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
+#endif
+		after = ibuf_index_page_calc_free_bits(0, max_ins_size
+						       - increase);
+#ifdef UNIV_IBUF_DEBUG
+		ut_a(after <= ibuf_index_page_calc_free(0, block));
+#endif
+	} else {
+		after = ibuf_index_page_calc_free(0, block);
+	}
+
+	if (after == 0) {
+		/* We move the page to the front of the buffer pool LRU list:
+		the purpose of this is to prevent those pages to which we
+		cannot make inserts using the insert buffer from slipping
+		out of the buffer pool */
+
+		buf_page_make_young(&block->page);
+	}
+
+	if (before > after) {
+		ibuf_set_free_bits(block, after, before);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h
new file mode 100644
index 00000000000..3fdbf078b0b
--- /dev/null
+++ b/storage/innobase/include/ibuf0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ibuf0types.h
+Insert buffer global types
+
+Created 7/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0types_h
+#define ibuf0types_h
+
+struct ibuf_t;
+
+#endif
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
new file mode 100644
index 00000000000..0054850b526
--- /dev/null
+++ b/storage/innobase/include/lock0iter.h
@@ -0,0 +1,69 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0iter.h
+Lock queue iterator type and function prototypes.
+
+Created July 16, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef lock0iter_h
+#define lock0iter_h
+
+#include "univ.i"
+#include "lock0types.h"
+
+struct lock_queue_iterator_t {
+	const lock_t*	current_lock;
+	/* In case this is a record lock queue (not table lock queue)
+	then bit_no is the record number within the heap in which the
+	record is stored. */
+	ulint		bit_no;
+};
+
+/*******************************************************************//**
+Initialize lock queue iterator so that it starts to iterate from
+"lock". bit_no specifies the record number within the heap where the
+record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
+1. If the lock is a table lock, thus we have a table lock queue;
+2. If the lock is a record lock and it is a wait lock. In this case
+   bit_no is calculated in this function by using
+   lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
+   of a wait lock. */
+UNIV_INTERN
+void
+lock_queue_iterator_reset(
+/*======================*/
+	lock_queue_iterator_t*	iter,	/*!< out: iterator */
+	const lock_t*		lock,	/*!< in: lock to start from */
+	ulint			bit_no);/*!< in: record number in the
+					heap */
+
+/*******************************************************************//**
+Gets the previous lock in the lock queue, returns NULL if there are no
+more locks (i.e. the current lock is the first one). The iterator is
+receded (if not-NULL is returned).
+@return	previous lock or NULL */
+
+const lock_t*
+lock_queue_iterator_get_prev(
+/*=========================*/
+	lock_queue_iterator_t*	iter);	/*!< in/out: iterator */
+
+#endif /* lock0iter_h */
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
new file mode 100644
index 00000000000..6d5ed35d5d8
--- /dev/null
+++ b/storage/innobase/include/lock0lock.h
@@ -0,0 +1,979 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0lock.h
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0lock_h
+#define lock0lock_h
+
+#include "univ.i"
+#include "buf0types.h"
+#include "trx0types.h"
+#include "mtr0types.h"
+#include "rem0types.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "lock0types.h"
+#include "read0types.h"
+#include "hash0hash.h"
+#include "srv0srv.h"
+#include "ut0vec.h"
+
+#ifdef UNIV_DEBUG
+extern ibool	lock_print_waits;
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets the size of a lock struct.
+@return	size in bytes */
+UNIV_INTERN
+ulint
+lock_get_size(void);
+/*===============*/
+/*********************************************************************//**
+Creates the lock system at database start. */
+UNIV_INTERN
+void
+lock_sys_create(
+/*============*/
+	ulint	n_cells);	/*!< in: number of slots in lock hash table */
+/*********************************************************************//**
+Closes the lock system at database shutdown. */
+UNIV_INTERN
+void
+lock_sys_close(void);
+/*================*/
+/*********************************************************************//**
+Gets the heap_no of the smallest user record on a page.
+@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+UNIV_INLINE
+ulint
+lock_get_min_heap_no(
+/*=================*/
+	const buf_block_t*	block);	/*!< in: buffer block */
+/*************************************************************//**
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+UNIV_INTERN
+void
+lock_move_reorganize_page(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: old index page, now
+					reorganized */
+	const buf_block_t*	oblock);/*!< in: copy of the old, not
+					reorganized page */
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_end(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec);		/*!< in: record on page: this
+						is the first record moved */
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+UNIV_INTERN
+void
+lock_move_rec_list_start(
+/*=====================*/
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec,		/*!< in: record on page:
+						this is the first
+						record NOT copied */
+	const rec_t*		old_end);	/*!< in: old
+						previous-to-last
+						record on new_page
+						before the records
+						were copied */
+/*************************************************************//**
+Updates the lock table when a page is split to the right. */
+UNIV_INTERN
+void
+lock_update_split_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block);	/*!< in: left page */
+/*************************************************************//**
+Updates the lock table when a page is merged to the right. */
+UNIV_INTERN
+void
+lock_update_merge_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page to
+						which merged */
+	const rec_t*		orig_succ,	/*!< in: original
+						successor of infimum
+						on the right page
+						before merge */
+	const buf_block_t*	left_block);	/*!< in: merged index
+						page which will be
+						discarded */
+/*************************************************************//**
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+UNIV_INTERN
+void
+lock_update_root_raise(
+/*===================*/
+	const buf_block_t*	block,	/*!< in: index page to which copied */
+	const buf_block_t*	root);	/*!< in: root page */
+/*************************************************************//**
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+UNIV_INTERN
+void
+lock_update_copy_and_discard(
+/*=========================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						which copied */
+	const buf_block_t*	block);		/*!< in: index page;
+						NOT the root! */
+/*************************************************************//**
+Updates the lock table when a page is split to the left. */
+UNIV_INTERN
+void
+lock_update_split_left(
+/*===================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block);	/*!< in: left page */
+/*************************************************************//**
+Updates the lock table when a page is merged to the left. */
+UNIV_INTERN
+void
+lock_update_merge_left(
+/*===================*/
+	const buf_block_t*	left_block,	/*!< in: left page to
+						which merged */
+	const rec_t*		orig_pred,	/*!< in: original predecessor
+						of supremum on the left page
+						before merge */
+	const buf_block_t*	right_block);	/*!< in: merged index page
+						which will be discarded */
+/*************************************************************//**
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+UNIV_INTERN
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+	const buf_block_t*	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const buf_block_t*	block,		/*!< in: block containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no);	/*!< in: heap_no of the
+						donating record */
+/*************************************************************//**
+Updates the lock table when a page is discarded. */
+UNIV_INTERN
+void
+lock_update_discard(
+/*================*/
+	const buf_block_t*	heir_block,	/*!< in: index page
+						which will inherit the locks */
+	ulint			heir_heap_no,	/*!< in: heap_no of the record
+						which will inherit the locks */
+	const buf_block_t*	block);		/*!< in: index page
+						which will be discarded */
+/*************************************************************//**
+Updates the lock table when a new user record is inserted. */
+UNIV_INTERN
+void
+lock_update_insert(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec);	/*!< in: the inserted record */
+/*************************************************************//**
+Updates the lock table when a record is removed. */
+UNIV_INTERN
+void
+lock_update_delete(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec);	/*!< in: the record to be removed */
+/*********************************************************************//**
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is in such an update moved, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+UNIV_INTERN
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec);	/*!< in: record whose lock state
+					is stored on the infimum
+					record of the same page; lock
+					bits are reset on the
+					record */
+/*********************************************************************//**
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+UNIV_INTERN
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record whose lock state
+					is restored */
+	const buf_block_t*	donator);/*!< in: page (rec is not
+					necessarily on this page)
+					whose infimum stored the lock
+					state; lock bits are reset on
+					the infimum */
+/*********************************************************************//**
+Determines if there are explicit record locks on a page.
+@return	an explicit record lock on the page, or NULL if there are none */
+UNIV_INTERN
+lock_t*
+lock_rec_expl_exist_on_page(
+/*========================*/
+	ulint	space,	/*!< in: space id */
+	ulint	page_no)/*!< in: page number */
+	__attribute__((warn_unused_result));
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_rec_insert_check_and_lock(
+/*===========================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
+				set, does nothing */
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool*		inherit)/*!< out: set to TRUE if the new
+				inserted record maybe should inherit
+				LOCK_GAP type locks from the successor
+				record */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record which should be
+					modified */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr)	/*!< in: query thread */
+	__attribute__((warn_unused_result, nonnull));
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify
+(delete mark or delete unmark) of a secondary index record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+				bit is set, does nothing */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	const rec_t*	rec,	/*!< in: record which should be
+				modified; NOTE: as this is a secondary
+				index, we always have to modify the
+				clustered index record first: see the
+				comment below */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((warn_unused_result, nonnull(2,3,4,6)));
+/*********************************************************************//**
+Like lock_clust_rec_read_check_and_lock(), but reads a
+secondary index record.
+@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: secondary index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr);	/*!< in: query thread */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record.
+@return	DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
+or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr);	/*!< in: query thread */
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets".
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	enum lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Checks that a record is seen in a consistent read.
+@return true if sees, or false if an earlier version of the record
+should be retrieved */
+UNIV_INTERN
+bool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+	const rec_t*	rec,	/*!< in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	read_view_t*	view);	/*!< in: consistent read view */
+/*********************************************************************//**
+Checks that a non-clustered index record is seen in a consistent read.
+
+NOTE that a non-clustered index page contains so little information on
+its modifications that also in the case false, the present version of
+rec may be the right, but we must check this from the clustered index
+record.
+
+@return true if certainly sees, or false if an earlier version of the
+clustered index record might be needed */
+UNIV_INTERN
+bool
+lock_sec_rec_cons_read_sees(
+/*========================*/
+	const rec_t*		rec,	/*!< in: user record which
+					should be read or passed over
+					by a read cursor */
+	const read_view_t*	view)	/*!< in: consistent read view */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+UNIV_INTERN
+dberr_t
+lock_table(
+/*=======*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	dict_table_t*	table,	/*!< in/out: database table
+				in dictionary cache */
+	enum lock_mode	mode,	/*!< in: lock mode */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Creates a table IX lock object for a resurrected transaction. */
+UNIV_INTERN
+void
+lock_table_ix_resurrect(
+/*====================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx);	/*!< in/out: transaction */
+/*************************************************************//**
+Removes a granted record lock of a transaction from the queue and grants
+locks to other transactions waiting in the queue if they now are entitled
+to a lock. */
+UNIV_INTERN
+void
+lock_rec_unlock(
+/*============*/
+	trx_t*			trx,	/*!< in/out: transaction that has
+					set a record lock */
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec,	/*!< in: record */
+	enum lock_mode		lock_mode);/*!< in: LOCK_S or LOCK_X */
+/*********************************************************************//**
+Releases a transaction's locks, and releases possible other transactions
+waiting because of these locks. Change the state of the transaction to
+TRX_STATE_COMMITTED_IN_MEMORY. */
+UNIV_INTERN
+void
+lock_trx_release_locks(
+/*===================*/
+	trx_t*	trx);	/*!< in/out: transaction */
+/*********************************************************************//**
+Removes locks on a table to be dropped or truncated.
+If remove_also_table_sx_locks is TRUE then table-level S and X locks are
+also removed in addition to other table-level and record-level locks.
+No lock, that is going to be removed, is allowed to be a wait lock. */
+UNIV_INTERN
+void
+lock_remove_all_on_table(
+/*=====================*/
+	dict_table_t*	table,			/*!< in: table to be dropped
+						or truncated */
+	ibool		remove_also_table_sx_locks);/*!< in: also removes
+						table S and X locks */
+
+/*********************************************************************//**
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	folded value */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+	__attribute__((const));
+/*********************************************************************//**
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	hashed value */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no);/*!< in: page number */
+
+/**********************************************************************//**
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+UNIV_INTERN
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+	const lock_t*	lock);	/*!< in: record lock with at least one
+				bit set */
+
+/*********************************************************************//**
+Gets the source table of an ALTER TABLE transaction.  The table must be
+covered by an IX or IS table lock.
+@return the source table of transaction, if it is covered by an IX or
+IS table lock; dest if there is no source table, and NULL if the
+transaction is locking more than two tables or an inconsistency is
+found */
+UNIV_INTERN
+dict_table_t*
+lock_get_src_table(
+/*===============*/
+	trx_t*		trx,	/*!< in: transaction */
+	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
+	enum lock_mode*	mode);	/*!< out: lock mode of the source table */
+/*********************************************************************//**
+Determine if the given table is exclusively "owned" by the given
+transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
+on the table.
+@return TRUE if table is only locked by trx, with LOCK_IX, and
+possibly LOCK_AUTO_INC */
+UNIV_INTERN
+ibool
+lock_is_table_exclusive(
+/*====================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const trx_t*		trx)	/*!< in: transaction */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return	TRUE if lock1 has to wait for lock2 to be removed */
+UNIV_INTERN
+ibool
+lock_has_to_wait(
+/*=============*/
+	const lock_t*	lock1,	/*!< in: waiting lock */
+	const lock_t*	lock2);	/*!< in: another lock; NOTE that it is
+				assumed that this has a lock bit set
+				on the same record as in lock1 if the
+				locks are record locks */
+/*********************************************************************//**
+Reports that a transaction id is insensible, i.e., in the future. */
+UNIV_INTERN
+void
+lock_report_trx_id_insanity(
+/*========================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Prints info of a table lock. */
+UNIV_INTERN
+void
+lock_table_print(
+/*=============*/
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock);	/*!< in: table type lock */
+/*********************************************************************//**
+Prints info of a record lock. */
+UNIV_INTERN
+void
+lock_rec_print(
+/*===========*/
+	FILE*		file,	/*!< in: file where to print */
+	const lock_t*	lock);	/*!< in: record type lock */
+/*********************************************************************//**
+Prints info of locks for all transactions.
+@return FALSE if not able to obtain lock mutex and exits without
+printing info */
+UNIV_INTERN
+ibool
+lock_print_info_summary(
+/*====================*/
+	FILE*	file,	/*!< in: file where to print */
+	ibool   nowait)	/*!< in: whether to wait for the lock mutex */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Prints info of locks for each transaction. This function assumes that the
+caller holds the lock mutex and more importantly it will release the lock
+mutex on behalf of the caller. (This should be fixed in the future). */
+UNIV_INTERN
+void
+lock_print_info_all_transactions(
+/*=============================*/
+	FILE*	file);	/*!< in: file where to print */
+/*********************************************************************//**
+Return approximate number or record locks (bits set in the bitmap) for
+this transaction. Since delete-marked records may be removed, the
+record count will not be precise.
+The caller must be holding lock_sys->mutex. */
+UNIV_INTERN
+ulint
+lock_number_of_rows_locked(
+/*=======================*/
+	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
+	__attribute__((nonnull, warn_unused_result));
+
+/*******************************************************************//**
+Gets the type of a lock. Non-inline version for using outside of the
+lock module.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INTERN
+ulint
+lock_get_type(
+/*==========*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the id of the transaction owning a lock.
+@return	transaction id */
+UNIV_INTERN
+trx_id_t
+lock_get_trx_id(
+/*============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the mode of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock mode */
+UNIV_INTERN
+const char*
+lock_get_mode_str(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the type of a lock in a human readable string.
+The string should not be free()'d or modified.
+@return	lock type */
+UNIV_INTERN
+const char*
+lock_get_type_str(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the id of the table on which the lock is.
+@return	id of the table */
+UNIV_INTERN
+table_id_t
+lock_get_table_id(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+Gets the name of the table on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the table */
+UNIV_INTERN
+const char*
+lock_get_table_name(
+/*================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the index on which the lock is.
+@return	index */
+UNIV_INTERN
+const dict_index_t*
+lock_rec_get_index(
+/*===============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the name of the index on which the lock is.
+The string should not be free()'d or modified.
+@return	name of the index */
+UNIV_INTERN
+const char*
+lock_rec_get_index_name(
+/*====================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the tablespace number on which the lock is.
+@return	tablespace number */
+UNIV_INTERN
+ulint
+lock_rec_get_space_id(
+/*==================*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*******************************************************************//**
+For a record lock, gets the page number on which the lock is.
+@return	page number */
+UNIV_INTERN
+ulint
+lock_rec_get_page_no(
+/*=================*/
+	const lock_t*	lock);	/*!< in: lock */
+/*******************************************************************//**
+Check if there are any locks (table or rec) against table.
+@return	TRUE if locks exist */
+UNIV_INTERN
+ibool
+lock_table_has_locks(
+/*=================*/
+	const dict_table_t*	table);	/*!< in: check if there are any locks
+					held on records in this table or on the
+					table itself */
+
+/*********************************************************************//**
+A thread which wakes up threads whose lock wait may have lasted too long.
+@return	a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(lock_wait_timeout_thread)(
+/*=====================================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+
+/********************************************************************//**
+Releases a user OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+UNIV_INTERN
+void
+lock_wait_release_thread_if_suspended(
+/*==================================*/
+	que_thr_t*	thr);	/*!< in: query thread associated with the
+				user OS thread	 */
+
+/***************************************************************//**
+Puts a user OS thread to wait for a lock to be released. If an error
+occurs during the wait trx->error_state associated with thr is
+!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
+are possible errors. DB_DEADLOCK is returned if selective deadlock
+resolution chose this transaction as a victim. */
+UNIV_INTERN
+void
+lock_wait_suspend_thread(
+/*=====================*/
+	que_thr_t*	thr);	/*!< in: query thread associated with the
+				user OS thread */
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
+function should be called at the the end of an SQL statement, by the
+connection thread that owns the transaction (trx->mysql_thd). */
+UNIV_INTERN
+void
+lock_unlock_table_autoinc(
+/*======================*/
+	trx_t*	trx);			/*!< in/out: transaction */
+/*********************************************************************//**
+Check whether the transaction has already been rolled back because it
+was selected as a deadlock victim, or if it has to wait then cancel
+the wait lock.
+@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+lock_trx_handle_wait(
+/*=================*/
+	trx_t*	trx)	/*!< in/out: trx lock state */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Get the number of locks on a table.
+@return number of locks */
+UNIV_INTERN
+ulint
+lock_table_get_n_locks(
+/*===================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return	true if ok */
+UNIV_INTERN
+bool
+lock_check_trx_id_sanity(
+/*=====================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return	the strongest lock found on any sys table or 0 for none */
+UNIV_INTERN
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+	const trx_t*	trx)	/*!< in: transaction to check */
+	__attribute__((warn_unused_result));
+
+/*******************************************************************//**
+Check if the transaction holds an exclusive lock on a record.
+@return	whether the locks are held */
+UNIV_INTERN
+bool
+lock_trx_has_rec_x_lock(
+/*====================*/
+	const trx_t*		trx,	/*!< in: transaction to check */
+	const dict_table_t*	table,	/*!< in: table to check */
+	const buf_block_t*	block,	/*!< in: buffer block of the record */
+	ulint			heap_no)/*!< in: record heap number */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+
+/** Lock modes and types */
+/* @{ */
+#define LOCK_MODE_MASK	0xFUL	/*!< mask used to extract mode from the
+				type_mode field in a lock */
+/** Lock types */
+/* @{ */
+#define LOCK_TABLE	16	/*!< table lock */
+#define	LOCK_REC	32	/*!< record lock */
+#define LOCK_TYPE_MASK	0xF0UL	/*!< mask used to extract lock type from the
+				type_mode field in a lock */
+#if LOCK_MODE_MASK & LOCK_TYPE_MASK
+# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
+#endif
+
+#define LOCK_WAIT	256	/*!< Waiting lock flag; when set, it
+				means that the lock has not yet been
+				granted, it is just waiting for its
+				turn in the wait queue */
+/* Precise modes */
+#define LOCK_ORDINARY	0	/*!< this flag denotes an ordinary
+				next-key lock in contrast to LOCK_GAP
+				or LOCK_REC_NOT_GAP */
+#define LOCK_GAP	512	/*!< when this bit is set, it means that the
+				lock holds only on the gap before the record;
+				for instance, an x-lock on the gap does not
+				give permission to modify the record on which
+				the bit is set; locks of this type are created
+				when records are removed from the index chain
+				of records */
+#define LOCK_REC_NOT_GAP 1024	/*!< this bit means that the lock is only on
+				the index record and does NOT block inserts
+				to the gap before the index record; this is
+				used in the case when we retrieve a record
+				with a unique key, and is also used in
+				locking plain SELECTs (not part of UPDATE
+				or DELETE) when the user has set the READ
+				COMMITTED isolation level */
+#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting
+				gap type record lock request in order to let
+				an insert of an index record to wait until
+				there are no conflicting locks by other
+				transactions on the gap; note that this flag
+				remains set when the waiting lock is granted,
+				or if the lock is inherited to a neighboring
+				record */
+
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+# error
+#endif
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+# error
+#endif
+/* @} */
+
+/** Lock operation struct */
+struct lock_op_t{
+	dict_table_t*	table;	/*!< table to be locked */
+	enum lock_mode	mode;	/*!< lock mode */
+};
+
+/** The lock system struct */
+struct lock_sys_t{
+	ib_mutex_t	mutex;			/*!< Mutex protecting the
+						locks */
+	hash_table_t*	rec_hash;		/*!< hash table of the record
+						locks */
+	ib_mutex_t	wait_mutex;		/*!< Mutex protecting the
+						next two fields */
+	srv_slot_t*	waiting_threads;	/*!< Array  of user threads
+						suspended while waiting for
+						locks within InnoDB, protected
+						by the lock_sys->wait_mutex */
+	srv_slot_t*	last_slot;		/*!< highest slot ever used
+						in the waiting_threads array,
+						protected by
+						lock_sys->wait_mutex */
+	ibool		rollback_complete;
+						/*!< TRUE if rollback of all
+						recovered transactions is
+						complete. Protected by
+						lock_sys->mutex */
+
+	ulint		n_lock_max_wait_time;	/*!< Max wait time */
+
+	os_event_t	timeout_event;		/*!< Set to the event that is
+						created in the lock wait monitor
+						thread. A value of 0 means the
+						thread is not active */
+
+	bool		timeout_thread_active;	/*!< True if the timeout thread
+						is running */
+};
+
+/** The lock system */
+extern lock_sys_t*	lock_sys;
+
+/** Test if lock_sys->mutex can be acquired without waiting. */
+#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex)
+
+/** Test if lock_sys->mutex is owned. */
+#define lock_mutex_own() mutex_own(&lock_sys->mutex)
+
+/** Acquire the lock_sys->mutex. */
+#define lock_mutex_enter() do {			\
+	mutex_enter(&lock_sys->mutex);		\
+} while (0)
+
+/** Release the lock_sys->mutex. */
+#define lock_mutex_exit() do {			\
+	mutex_exit(&lock_sys->mutex);		\
+} while (0)
+
+/** Test if lock_sys->wait_mutex is owned. */
+#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex)
+
+/** Acquire the lock_sys->wait_mutex. */
+#define lock_wait_mutex_enter() do {		\
+	mutex_enter(&lock_sys->wait_mutex);	\
+} while (0)
+
+/** Release the lock_sys->wait_mutex. */
+#define lock_wait_mutex_exit() do {		\
+	mutex_exit(&lock_sys->wait_mutex);	\
+} while (0)
+
+#ifndef UNIV_NONINL
+#include "lock0lock.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
new file mode 100644
index 00000000000..736936954cb
--- /dev/null
+++ b/storage/innobase/include/lock0lock.ic
@@ -0,0 +1,92 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0lock.ic
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "srv0srv.h"
+#include "dict0dict.h"
+#include "row0row.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+#include "buf0buf.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "row0vers.h"
+#include "que0que.h"
+#include "btr0cur.h"
+#include "read0read.h"
+#include "log0recv.h"
+
+/*********************************************************************//**
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	folded value */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+{
+	return(ut_fold_ulint_pair(space, page_no));
+}
+
+/*********************************************************************//**
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table.
+@return	hashed value */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+{
+	return(hash_calc_hash(lock_rec_fold(space, page_no),
+			      lock_sys->rec_hash));
+}
+
+/*********************************************************************//**
+Gets the heap_no of the smallest user record on a page.
+@return	heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
+UNIV_INLINE
+ulint
+lock_get_min_heap_no(
+/*=================*/
+	const buf_block_t*	block)	/*!< in: buffer block */
+{
+	const page_t*	page	= block->frame;
+
+	if (page_is_comp(page)) {
+		return(rec_get_heap_no_new(
+			       page
+			       + rec_get_next_offs(page + PAGE_NEW_INFIMUM,
+						   TRUE)));
+	} else {
+		return(rec_get_heap_no_old(
+			       page
+			       + rec_get_next_offs(page + PAGE_OLD_INFIMUM,
+						   FALSE)));
+	}
+}
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
new file mode 100644
index 00000000000..9f7ab9f76b6
--- /dev/null
+++ b/storage/innobase/include/lock0priv.h
@@ -0,0 +1,126 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0priv.h
+Lock module internal structures and methods.
+
+Created July 12, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef lock0priv_h
+#define lock0priv_h
+
+#ifndef LOCK_MODULE_IMPLEMENTATION
+/* If you need to access members of the structures defined in this
+file, please write appropriate functions that retrieve them and put
+those functions in lock/ */
+#error Do not include lock0priv.h outside of the lock/ module
+#endif
+
+#include "univ.i"
+#include "dict0types.h"
+#include "hash0hash.h"
+#include "trx0types.h"
+#include "ut0lst.h"
+
+/** A table lock */
+struct lock_table_t {
+	dict_table_t*	table;		/*!< database table in dictionary
+					cache */
+	UT_LIST_NODE_T(lock_t)
+			locks;		/*!< list of locks on the same
+					table */
+};
+
+/** Record lock for a page */
+struct lock_rec_t {
+	ulint	space;			/*!< space id */
+	ulint	page_no;		/*!< page number */
+	ulint	n_bits;			/*!< number of bits in the lock
+					bitmap; NOTE: the lock bitmap is
+					placed immediately after the
+					lock struct */
+};
+
+/** Lock struct; protected by lock_sys->mutex */
+struct lock_t {
+	trx_t*		trx;		/*!< transaction owning the
+					lock */
+	UT_LIST_NODE_T(lock_t)
+			trx_locks;	/*!< list of the locks of the
+					transaction */
+	ulint		type_mode;	/*!< lock type, mode, LOCK_GAP or
+					LOCK_REC_NOT_GAP,
+					LOCK_INSERT_INTENTION,
+					wait flag, ORed */
+	hash_node_t	hash;		/*!< hash chain node for a record
+					lock */
+	dict_index_t*	index;		/*!< index for a record lock */
+	union {
+		lock_table_t	tab_lock;/*!< table lock */
+		lock_rec_t	rec_lock;/*!< record lock */
+	} un_member;			/*!< lock details */
+};
+
+/*********************************************************************//**
+Gets the type of a lock.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INLINE
+ulint
+lock_get_type_low(
+/*==============*/
+	const lock_t*	lock);	/*!< in: lock */
+
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return	previous lock on the same record, NULL if none exists */
+UNIV_INTERN
+const lock_t*
+lock_rec_get_prev(
+/*==============*/
+	const lock_t*	in_lock,/*!< in: record lock */
+	ulint		heap_no);/*!< in: heap number of the record */
+
+/*********************************************************************//**
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+UNIV_INTERN
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+	lock_t*	lock);	/*!< in/out: waiting lock request */
+
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index.
+@return	transaction id of the transaction which has the x-lock, or 0 */
+UNIV_INLINE
+trx_id_t
+lock_clust_rec_some_has_impl(
+/*=========================*/
+	const rec_t*		rec,	/*!< in: user record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "lock0priv.ic"
+#endif
+
+#endif /* lock0priv_h */
diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic
new file mode 100644
index 00000000000..6b70dc33d3c
--- /dev/null
+++ b/storage/innobase/include/lock0priv.ic
@@ -0,0 +1,67 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0priv.ic
+Lock module internal inline methods.
+
+Created July 16, 2007 Vasil Dimov
+*******************************************************/
+
+/* This file contains only methods which are used in
+lock/lock0* files, other than lock/lock0lock.cc.
+I.e. lock/lock0lock.cc contains more internal inline
+methods but they are used only in that file. */
+
+#ifndef LOCK_MODULE_IMPLEMENTATION
+#error Do not include lock0priv.ic outside of the lock/ module
+#endif
+
+/*********************************************************************//**
+Gets the type of a lock.
+@return	LOCK_TABLE or LOCK_REC */
+UNIV_INLINE
+ulint
+lock_get_type_low(
+/*==============*/
+	const lock_t*	lock)	/*!< in: lock */
+{
+	ut_ad(lock);
+
+	return(lock->type_mode & LOCK_TYPE_MASK);
+}
+
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index.
+@return	transaction id of the transaction which has the x-lock, or 0 */
+UNIV_INLINE
+trx_id_t
+lock_clust_rec_some_has_impl(
+/*=========================*/
+	const rec_t*		rec,	/*!< in: user record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(page_rec_is_user_rec(rec));
+
+	return(row_get_rec_trx_id(rec, index, offsets));
+}
+
+/* vim: set filetype=c: */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
new file mode 100644
index 00000000000..cf32e72f864
--- /dev/null
+++ b/storage/innobase/include/lock0types.h
@@ -0,0 +1,47 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/lock0types.h
+The transaction lock system global types
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0types_h
+#define lock0types_h
+
+#define lock_t ib_lock_t
+struct lock_t;
+struct lock_sys_t;
+
+/* Basic lock modes */
+enum lock_mode {
+	LOCK_IS = 0,	/* intention shared */
+	LOCK_IX,	/* intention exclusive */
+	LOCK_S,		/* shared */
+	LOCK_X,		/* exclusive */
+	LOCK_AUTO_INC,	/* locks the auto-inc counter of a table
+			in an exclusive mode */
+	LOCK_NONE,	/* this is used elsewhere to note consistent read */
+	LOCK_NUM = LOCK_NONE, /* number of lock modes */
+	LOCK_NONE_UNSET = 255
+};
+
+
+#endif
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
new file mode 100644
index 00000000000..1318b62c242
--- /dev/null
+++ b/storage/innobase/include/log0log.h
@@ -0,0 +1,999 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2009, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0log.h
+Database log
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0log_h
+#define log0log_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "ut0lst.h"
+#ifndef UNIV_HOTBACKUP
+#include "sync0sync.h"
+#include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/* Type used for all log sequence number storage and arithmetics */
+typedef	ib_uint64_t		lsn_t;
+#define LSN_MAX			IB_UINT64_MAX
+
+#define LSN_PF			UINT64PF
+
+/** Redo log buffer */
+struct log_t;
+/** Redo log group */
+struct log_group_t;
+
+#ifdef UNIV_DEBUG
+/** Flag: write to log file? */
+extern	ibool	log_do_write;
+/** Flag: enable debug output when writing to the log? */
+extern	ibool	log_debug_writes;
+#else /* UNIV_DEBUG */
+/** Write to log */
+# define log_do_write TRUE
+#endif /* UNIV_DEBUG */
+
+/** Wait modes for log_write_up_to @{ */
+#define LOG_NO_WAIT		91
+#define LOG_WAIT_ONE_GROUP	92
+#define	LOG_WAIT_ALL_GROUPS	93
+/* @} */
+/** Maximum number of log groups in log_group_t::checkpoint_buf */
+#define LOG_MAX_N_GROUPS	32
+
+/*******************************************************************//**
+Calculates where in log files we find a specified lsn.
+@return	log file number */
+UNIV_INTERN
+ulint
+log_calc_where_lsn_is(
+/*==================*/
+	ib_int64_t*	log_file_offset,	/*!< out: offset in that file
+						(including the header) */
+	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
+						lsn */
+	ib_uint64_t	lsn,			/*!< in: lsn whose position to
+						determine */
+	ulint		n_log_files,		/*!< in: total number of log
+						files */
+	ib_int64_t	log_file_size);		/*!< in: log file size
+						(including the header) */
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Writes to the log the string given. The log must be released with
+log_release.
+@return	end lsn of the log record, zero if did not succeed */
+UNIV_INLINE
+lsn_t
+log_reserve_and_write_fast(
+/*=======================*/
+	const void*	str,	/*!< in: string */
+	ulint		len,	/*!< in: string length */
+	lsn_t*		start_lsn);/*!< out: start lsn of the log record */
+/***********************************************************************//**
+Releases the log mutex. */
+UNIV_INLINE
+void
+log_release(void);
+/*=============*/
+/***********************************************************************//**
+Checks if there is need for a log buffer flush or a new checkpoint, and does
+this if yes. Any database operation should call this when it has modified
+more than about 4 pages. NOTE that this function may only be called when the
+OS thread owns no synchronization objects except the dictionary mutex. */
+UNIV_INLINE
+void
+log_free_check(void);
+/*================*/
+/************************************************************//**
+Opens the log for log_write_low. The log must be closed with log_close and
+released with log_release.
+@return	start lsn of the log record */
+UNIV_INTERN
+lsn_t
+log_reserve_and_open(
+/*=================*/
+	ulint	len);	/*!< in: length of data to be catenated */
+/************************************************************//**
+Writes to the log the string given. It is assumed that the caller holds the
+log mutex. */
+UNIV_INTERN
+void
+log_write_low(
+/*==========*/
+	byte*	str,		/*!< in: string */
+	ulint	str_len);	/*!< in: string length */
+/************************************************************//**
+Closes the log.
+@return	lsn */
+UNIV_INTERN
+lsn_t
+log_close(void);
+/*===========*/
+/************************************************************//**
+Gets the current lsn.
+@return	current lsn */
+UNIV_INLINE
+lsn_t
+log_get_lsn(void);
+/*=============*/
+/****************************************************************
+Gets the log group capacity. It is OK to read the value without
+holding log_sys->mutex because it is constant.
+@return	log group capacity */
+UNIV_INLINE
+lsn_t
+log_get_capacity(void);
+/*==================*/
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return	max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void);
+/*================================*/
+/******************************************************//**
+Initializes the log. */
+UNIV_INTERN
+void
+log_init(void);
+/*==========*/
+/******************************************************************//**
+Inits a log group to the log system. */
+UNIV_INTERN
+void
+log_group_init(
+/*===========*/
+	ulint	id,			/*!< in: group id */
+	ulint	n_files,		/*!< in: number of log files */
+	lsn_t	file_size,		/*!< in: log file size in bytes */
+	ulint	space_id,		/*!< in: space id of the file space
+					which contains the log files of this
+					group */
+	ulint	archive_space_id);	/*!< in: space id of the file space
+					which contains some archived log
+					files for this group; currently, only
+					for the first log group this is
+					used */
+/******************************************************//**
+Completes an i/o to a log file. */
+UNIV_INTERN
+void
+log_io_complete(
+/*============*/
+	log_group_t*	group);	/*!< in: log group */
+/******************************************************//**
+This function is called, e.g., when a transaction wants to commit. It checks
+that the log has been written to the log file up to the last log entry written
+by the transaction. If there is a flush running, it waits and checks if the
+flush flushed enough. If not, starts a new flush. */
+UNIV_INTERN
+void
+log_write_up_to(
+/*============*/
+	lsn_t	lsn,	/*!< in: log sequence number up to which
+			the log should be written, LSN_MAX if not specified */
+	ulint	wait,	/*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+			or LOG_WAIT_ALL_GROUPS */
+	ibool	flush_to_disk);
+			/*!< in: TRUE if we want the written log
+			also to be flushed to disk */
+/****************************************************************//**
+Does a syncronous flush of the log buffer to disk. */
+UNIV_INTERN
+void
+log_buffer_flush_to_disk(void);
+/*==========================*/
+/****************************************************************//**
+This functions writes the log buffer to the log file and if 'flush'
+is set it forces a flush of the log file as well. This is meant to be
+called from background master thread only as it does not wait for
+the write (+ possible flush) to finish. */
+UNIV_INTERN
+void
+log_buffer_sync_in_background(
+/*==========================*/
+	ibool	flush);	/*<! in: flush the logs to disk */
+/******************************************************//**
+Makes a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log files. Use log_make_checkpoint_at to flush also the pool.
+@return	TRUE if success, FALSE if a checkpoint write was already running */
+UNIV_INTERN
+ibool
+log_checkpoint(
+/*===========*/
+	ibool	sync,		/*!< in: TRUE if synchronous operation is
+				desired */
+	ibool	write_always);	/*!< in: the function normally checks if the
+				the new checkpoint would have a greater
+				lsn than the previous one: if not, then no
+				physical write is done; by setting this
+				parameter TRUE, a physical write will always be
+				made to log files */
+/****************************************************************//**
+Makes a checkpoint at a given lsn or later. */
+UNIV_INTERN
+void
+log_make_checkpoint_at(
+/*===================*/
+	lsn_t	lsn,		/*!< in: make a checkpoint at this or a
+				later lsn, if LSN_MAX, makes
+				a checkpoint at the latest lsn */
+	ibool	write_always);	/*!< in: the function normally checks if
+				the new checkpoint would have a
+				greater lsn than the previous one: if
+				not, then no physical write is done;
+				by setting this parameter TRUE, a
+				physical write will always be made to
+				log files */
+/****************************************************************//**
+Makes a checkpoint at the latest lsn and writes it to first page of each
+data file in the database, so that we know that the file spaces contain
+all modifications up to that lsn. This can only be called at database
+shutdown. This function also writes all log in log files to the log archive. */
+UNIV_INTERN
+void
+logs_empty_and_mark_files_at_shutdown(void);
+/*=======================================*/
+/******************************************************//**
+Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
+UNIV_INTERN
+void
+log_group_read_checkpoint_info(
+/*===========================*/
+	log_group_t*	group,	/*!< in: log group */
+	ulint		field);	/*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+/*******************************************************************//**
+Gets info from a checkpoint about a log group. */
+UNIV_INTERN
+void
+log_checkpoint_get_nth_group_info(
+/*==============================*/
+	const byte*	buf,	/*!< in: buffer containing checkpoint info */
+	ulint		n,	/*!< in: nth slot */
+	ulint*		file_no,/*!< out: archived file number */
+	ulint*		offset);/*!< out: archived file offset */
+/******************************************************//**
+Writes checkpoint info to groups. */
+UNIV_INTERN
+void
+log_groups_write_checkpoint_info(void);
+/*==================================*/
+/********************************************************************//**
+Starts an archiving operation.
+@return	TRUE if succeed, FALSE if an archiving operation was already running */
+UNIV_INTERN
+ibool
+log_archive_do(
+/*===========*/
+	ibool	sync,	/*!< in: TRUE if synchronous operation is desired */
+	ulint*	n_bytes);/*!< out: archive log buffer size, 0 if nothing to
+			archive */
+/****************************************************************//**
+Writes the log contents to the archive up to the lsn when this function was
+called, and stops the archiving. When archiving is started again, the archived
+log file numbers start from a number one higher, so that the archiving will
+not write again to the archived log files which exist when this function
+returns.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
+ulint
+log_archive_stop(void);
+/*==================*/
+/****************************************************************//**
+Starts again archiving which has been stopped.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
+ulint
+log_archive_start(void);
+/*===================*/
+/****************************************************************//**
+Stop archiving the log so that a gap may occur in the archived log files.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
+ulint
+log_archive_noarchivelog(void);
+/*==========================*/
+/****************************************************************//**
+Start archiving the log so that a gap may occur in the archived log files.
+@return	DB_SUCCESS or DB_ERROR */
+UNIV_INTERN
+ulint
+log_archive_archivelog(void);
+/*========================*/
+/******************************************************//**
+Generates an archived log file name. */
+UNIV_INTERN
+void
+log_archived_file_name_gen(
+/*=======================*/
+	char*	buf,	/*!< in: buffer where to write */
+	ulint	id,	/*!< in: group id */
+	ulint	file_no);/*!< in: file number */
+#else /* !UNIV_HOTBACKUP */
+/******************************************************//**
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+UNIV_INTERN
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+	byte*		hdr_buf,/*!< in: buffer which will be written to the
+				start of the first log file */
+	ib_uint64_t	start);	/*!< in: lsn of the start of the first log file;
+				we pretend that there is a checkpoint at
+				start + LOG_BLOCK_HDR_SIZE */
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+Checks that there is enough free space in the log to start a new query step.
+Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
+function may only be called if the calling thread owns no synchronization
+objects! */
+UNIV_INTERN
+void
+log_check_margins(void);
+/*===================*/
+#ifndef UNIV_HOTBACKUP
+/******************************************************//**
+Reads a specified log segment to a buffer. */
+UNIV_INTERN
+void
+log_group_read_log_seg(
+/*===================*/
+	ulint		type,		/*!< in: LOG_ARCHIVE or LOG_RECOVER */
+	byte*		buf,		/*!< in: buffer where to read */
+	log_group_t*	group,		/*!< in: log group */
+	lsn_t		start_lsn,	/*!< in: read area start */
+	lsn_t		end_lsn);	/*!< in: read area end */
+/******************************************************//**
+Writes a buffer to a log file group. */
+UNIV_INTERN
+void
+log_group_write_buf(
+/*================*/
+	log_group_t*	group,		/*!< in: log group */
+	byte*		buf,		/*!< in: buffer */
+	ulint		len,		/*!< in: buffer len; must be divisible
+					by OS_FILE_LOG_BLOCK_SIZE */
+	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
+					be divisible by
+					OS_FILE_LOG_BLOCK_SIZE */
+	ulint		new_data_offset);/*!< in: start offset of new data in
+					buf: this parameter is used to decide
+					if we have to write a new log file
+					header */
+/********************************************************//**
+Sets the field values in group to correspond to a given lsn. For this function
+to work, the values must already be correctly initialized to correspond to
+some lsn, for instance, a checkpoint lsn. */
+UNIV_INTERN
+void
+log_group_set_fields(
+/*=================*/
+	log_group_t*	group,	/*!< in/out: group */
+	lsn_t		lsn);	/*!< in: lsn for which the values should be
+				set */
+/******************************************************//**
+Calculates the data capacity of a log group, when the log file headers are not
+included.
+@return	capacity in bytes */
+UNIV_INTERN
+lsn_t
+log_group_get_capacity(
+/*===================*/
+	const log_group_t*	group);	/*!< in: log group */
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Gets a log block flush bit.
+@return	TRUE if this block was the first to be written in a log flush */
+UNIV_INLINE
+ibool
+log_block_get_flush_bit(
+/*====================*/
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Gets a log block number stored in the header.
+@return	log block number stored in the block header */
+UNIV_INLINE
+ulint
+log_block_get_hdr_no(
+/*=================*/
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Gets a log block data length.
+@return	log block data length measured as a byte offset from the block start */
+UNIV_INLINE
+ulint
+log_block_get_data_len(
+/*===================*/
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Sets the log block data length. */
+UNIV_INLINE
+void
+log_block_set_data_len(
+/*===================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	len);		/*!< in: data length */
+/************************************************************//**
+Calculates the checksum for a log block.
+@return	checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum(
+/*====================*/
+	const byte*	block);	/*!< in: log block */
+/************************************************************//**
+Gets a log block checksum field value.
+@return	checksum */
+UNIV_INLINE
+ulint
+log_block_get_checksum(
+/*===================*/
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Sets a log block checksum field value. */
+UNIV_INLINE
+void
+log_block_set_checksum(
+/*===================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	checksum);	/*!< in: checksum */
+/************************************************************//**
+Gets a log block first mtr log record group offset.
+@return first mtr log record group byte offset from the block start, 0
+if none */
+UNIV_INLINE
+ulint
+log_block_get_first_rec_group(
+/*==========================*/
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Sets the log block first mtr log record group offset. */
+UNIV_INLINE
+void
+log_block_set_first_rec_group(
+/*==========================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	offset);	/*!< in: offset, 0 if none */
+/************************************************************//**
+Gets a log block checkpoint number field (4 lowest bytes).
+@return	checkpoint no (4 lowest bytes) */
+UNIV_INLINE
+ulint
+log_block_get_checkpoint_no(
+/*========================*/
+	const byte*	log_block);	/*!< in: log block */
+/************************************************************//**
+Initializes a log block in the log buffer. */
+UNIV_INLINE
+void
+log_block_init(
+/*===========*/
+	byte*	log_block,	/*!< in: pointer to the log buffer */
+	lsn_t	lsn);		/*!< in: lsn within the log block */
+/************************************************************//**
+Initializes a log block in the log buffer in the old, < 3.23.52 format, where
+there was no checksum yet. */
+UNIV_INLINE
+void
+log_block_init_in_old_format(
+/*=========================*/
+	byte*	log_block,	/*!< in: pointer to the log buffer */
+	lsn_t	lsn);		/*!< in: lsn within the log block */
+/************************************************************//**
+Converts a lsn to a log block number.
+@return	log block number, it is > 0 and <= 1G */
+UNIV_INLINE
+ulint
+log_block_convert_lsn_to_no(
+/*========================*/
+	lsn_t	lsn);	/*!< in: lsn of a byte within the block */
+/******************************************************//**
+Prints info of the log. */
+UNIV_INTERN
+void
+log_print(
+/*======*/
+	FILE*	file);	/*!< in: file where to print */
+/******************************************************//**
+Peeks the current lsn.
+@return	TRUE if success, FALSE if could not get the log system mutex */
+UNIV_INTERN
+ibool
+log_peek_lsn(
+/*=========*/
+	lsn_t*	lsn);	/*!< out: if returns TRUE, current lsn is here */
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+log_refresh_stats(void);
+/*===================*/
+/********************************************************//**
+Closes all log groups. */
+UNIV_INTERN
+void
+log_group_close_all(void);
+/*=====================*/
+/********************************************************//**
+Shutdown the log system but do not release all the memory. */
+UNIV_INTERN
+void
+log_shutdown(void);
+/*==============*/
+/********************************************************//**
+Free the log system data structures. */
+UNIV_INTERN
+void
+log_mem_free(void);
+/*==============*/
+
+extern log_t*	log_sys;
+
+/* Values used as flags */
+#define LOG_FLUSH	7652559
+#define LOG_CHECKPOINT	78656949
+#ifdef UNIV_LOG_ARCHIVE
+# define LOG_ARCHIVE	11122331
+#endif /* UNIV_LOG_ARCHIVE */
+#define LOG_RECOVER	98887331
+
+/* The counting of lsn's starts from this value: this must be non-zero */
+#define LOG_START_LSN		((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
+
+#define LOG_BUFFER_SIZE		(srv_log_buffer_size * UNIV_PAGE_SIZE)
+#define LOG_ARCHIVE_BUF_SIZE	(srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
+
+/* Offsets of a log block header */
+#define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
+					is allowed to wrap around at 2G; the
+					highest bit is set to 1 if this is the
+					first log block in a log flush write
+					segment */
+#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
+					/* mask used to get the highest bit in
+					the preceding field */
+#define	LOG_BLOCK_HDR_DATA_LEN	4	/* number of bytes of log written to
+					this block */
+#define	LOG_BLOCK_FIRST_REC_GROUP 6	/* offset of the first start of an
+					mtr log record group in this log block,
+					0 if none; if the value is the same
+					as LOG_BLOCK_HDR_DATA_LEN, it means
+					that the first rec group has not yet
+					been catenated to this log block, but
+					if it will, it will start at this
+					offset; an archive recovery can
+					start parsing the log records starting
+					from this offset in this log block,
+					if value not 0 */
+#define LOG_BLOCK_CHECKPOINT_NO	8	/* 4 lower bytes of the value of
+					log_sys->next_checkpoint_no when the
+					log block was last written to: if the
+					block has not yet been written full,
+					this value is only updated before a
+					log buffer flush */
+#define LOG_BLOCK_HDR_SIZE	12	/* size of the log block header in
+					bytes */
+
+/* Offsets of a log block trailer from the end of the block */
+#define	LOG_BLOCK_CHECKSUM	4	/* 4 byte checksum of the log block
+					contents; in InnoDB versions
+					< 3.23.52 this did not contain the
+					checksum but the same value as
+					.._HDR_NO */
+#define	LOG_BLOCK_TRL_SIZE	4	/* trailer size in bytes */
+
+/* Offsets for a checkpoint field */
+#define LOG_CHECKPOINT_NO		0
+#define LOG_CHECKPOINT_LSN		8
+#define LOG_CHECKPOINT_OFFSET_LOW32	16
+#define LOG_CHECKPOINT_LOG_BUF_SIZE	20
+#define	LOG_CHECKPOINT_ARCHIVED_LSN	24
+#define	LOG_CHECKPOINT_GROUP_ARRAY	32
+
+/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
+
+#define LOG_CHECKPOINT_ARCHIVED_FILE_NO	0
+#define LOG_CHECKPOINT_ARCHIVED_OFFSET	4
+
+#define	LOG_CHECKPOINT_ARRAY_END	(LOG_CHECKPOINT_GROUP_ARRAY\
+							+ LOG_MAX_N_GROUPS * 8)
+#define LOG_CHECKPOINT_CHECKSUM_1	LOG_CHECKPOINT_ARRAY_END
+#define LOG_CHECKPOINT_CHECKSUM_2	(4 + LOG_CHECKPOINT_ARRAY_END)
+#if 0
+#define LOG_CHECKPOINT_FSP_FREE_LIMIT	(8 + LOG_CHECKPOINT_ARRAY_END)
+					/*!< Not used (0);
+					This used to contain the
+					current fsp free limit in
+					tablespace 0, in units of one
+					megabyte.
+
+					This information might have been used
+					since mysqlbackup version 0.35 but
+					before 1.41 to decide if unused ends of
+					non-auto-extending data files
+					in space 0 can be truncated.
+
+					This information was made obsolete
+					by mysqlbackup --compress. */
+#define LOG_CHECKPOINT_FSP_MAGIC_N	(12 + LOG_CHECKPOINT_ARRAY_END)
+					/*!< Not used (0);
+					This magic number tells if the
+					checkpoint contains the above field:
+					the field was added to
+					InnoDB-3.23.50 and
+					removed from MySQL 5.6 */
+#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL	1441231243
+					/*!< if LOG_CHECKPOINT_FSP_MAGIC_N
+					contains this value, then
+					LOG_CHECKPOINT_FSP_FREE_LIMIT
+					is valid */
+#endif
+#define LOG_CHECKPOINT_OFFSET_HIGH32	(16 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_SIZE		(20 + LOG_CHECKPOINT_ARRAY_END)
+
+
+/* Offsets of a log file header */
+#define LOG_GROUP_ID		0	/* log group number */
+#define LOG_FILE_START_LSN	4	/* lsn of the start of data in this
+					log file */
+#define LOG_FILE_NO		12	/* 4-byte archived log file number;
+					this field is only defined in an
+					archived log file */
+#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
+					/* a 32-byte field which contains
+					the string 'ibbackup' and the
+					creation time if the log file was
+					created by mysqlbackup --restore;
+					when mysqld is first time started
+					on the restored database, it can
+					print helpful info for the user */
+#define	LOG_FILE_ARCH_COMPLETED	OS_FILE_LOG_BLOCK_SIZE
+					/* this 4-byte field is TRUE when
+					the writing of an archived log file
+					has been completed; this field is
+					only defined in an archived log file */
+#define LOG_FILE_END_LSN	(OS_FILE_LOG_BLOCK_SIZE + 4)
+					/* lsn where the archived log file
+					at least extends: actually the
+					archived log file may extend to a
+					later lsn, as long as it is within the
+					same log block as this lsn; this field
+					is defined only when an archived log
+					file has been completely written */
+#define LOG_CHECKPOINT_1	OS_FILE_LOG_BLOCK_SIZE
+					/* first checkpoint field in the log
+					header; we write alternately to the
+					checkpoint fields when we make new
+					checkpoints; this field is only defined
+					in the first log file of a log group */
+#define LOG_CHECKPOINT_2	(3 * OS_FILE_LOG_BLOCK_SIZE)
+					/* second checkpoint field in the log
+					header */
+#define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
+
+#define LOG_GROUP_OK		301
+#define LOG_GROUP_CORRUPTED	302
+
+/** Log group consists of a number of log files, each of the same size; a log
+group is implemented as a space in the sense of the module fil0fil. */
+struct log_group_t{
+	/* The following fields are protected by log_sys->mutex */
+	ulint		id;		/*!< log group id */
+	ulint		n_files;	/*!< number of files in the group */
+	lsn_t		file_size;	/*!< individual log file size in bytes,
+					including the log file header */
+	ulint		space_id;	/*!< file space which implements the log
+					group */
+	ulint		state;		/*!< LOG_GROUP_OK or
+					LOG_GROUP_CORRUPTED */
+	lsn_t		lsn;		/*!< lsn used to fix coordinates within
+					the log group */
+	lsn_t		lsn_offset;	/*!< the offset of the above lsn */
+	ulint		n_pending_writes;/*!< number of currently pending flush
+					writes for this log group */
+	byte**		file_header_bufs_ptr;/*!< unaligned buffers */
+	byte**		file_header_bufs;/*!< buffers for each file
+					header in the group */
+#ifdef UNIV_LOG_ARCHIVE
+	/*-----------------------------*/
+	byte**		archive_file_header_bufs_ptr;/*!< unaligned buffers */
+	byte**		archive_file_header_bufs;/*!< buffers for each file
+					header in the group */
+	ulint		archive_space_id;/*!< file space which
+					implements the log group
+					archive */
+	ulint		archived_file_no;/*!< file number corresponding to
+					log_sys->archived_lsn */
+	ulint		archived_offset;/*!< file offset corresponding to
+					log_sys->archived_lsn, 0 if we have
+					not yet written to the archive file
+					number archived_file_no */
+	ulint		next_archived_file_no;/*!< during an archive write,
+					until the write is completed, we
+					store the next value for
+					archived_file_no here: the write
+					completion function then sets the new
+					value to ..._file_no */
+	ulint		next_archived_offset; /*!< like the preceding field */
+#endif /* UNIV_LOG_ARCHIVE */
+	/*-----------------------------*/
+	lsn_t		scanned_lsn;	/*!< used only in recovery: recovery scan
+					succeeded up to this lsn in this log
+					group */
+	byte*		checkpoint_buf_ptr;/*!< unaligned checkpoint header */
+	byte*		checkpoint_buf;	/*!< checkpoint header is written from
+					this buffer to the group */
+	UT_LIST_NODE_T(log_group_t)
+			log_groups;	/*!< list of log groups */
+};
+
+/** Redo log buffer */
+struct log_t{
+	byte		pad[64];	/*!< padding to prevent other memory
+					update hotspots from residing on the
+					same memory cache line */
+	lsn_t		lsn;		/*!< log sequence number */
+	ulint		buf_free;	/*!< first free offset within the log
+					buffer */
+#ifndef UNIV_HOTBACKUP
+	ib_mutex_t		mutex;		/*!< mutex protecting the log */
+
+	ib_mutex_t		log_flush_order_mutex;/*!< mutex to serialize access to
+					the flush list when we are putting
+					dirty blocks in the list. The idea
+					behind this mutex is to be able
+					to release log_sys->mutex during
+					mtr_commit and still ensure that
+					insertions in the flush_list happen
+					in the LSN order. */
+#endif /* !UNIV_HOTBACKUP */
+	byte*		buf_ptr;	/* unaligned log buffer */
+	byte*		buf;		/*!< log buffer */
+	ulint		buf_size;	/*!< log buffer size in bytes */
+	ulint		max_buf_free;	/*!< recommended maximum value of
+					buf_free, after which the buffer is
+					flushed */
+ #ifdef UNIV_LOG_DEBUG
+	ulint		old_buf_free;	/*!< value of buf free when log was
+					last time opened; only in the debug
+					version */
+	ib_uint64_t	old_lsn;	/*!< value of lsn when log was
+					last time opened; only in the
+					debug version */
+#endif /* UNIV_LOG_DEBUG */
+	ibool		check_flush_or_checkpoint;
+					/*!< this is set to TRUE when there may
+					be need to flush the log buffer, or
+					preflush buffer pool pages, or make
+					a checkpoint; this MUST be TRUE when
+					lsn - last_checkpoint_lsn >
+					max_checkpoint_age; this flag is
+					peeked at by log_free_check(), which
+					does not reserve the log mutex */
+	UT_LIST_BASE_NODE_T(log_group_t)
+			log_groups;	/*!< log groups */
+
+#ifndef UNIV_HOTBACKUP
+	/** The fields involved in the log buffer flush @{ */
+
+	ulint		buf_next_to_write;/*!< first offset in the log buffer
+					where the byte content may not exist
+					written to file, e.g., the start
+					offset of a log record catenated
+					later; this is advanced when a flush
+					operation is completed to all the log
+					groups */
+	volatile bool	is_extending;	/*!< this is set to true during extend
+					the log buffer size */
+	lsn_t		written_to_some_lsn;
+					/*!< first log sequence number not yet
+					written to any log group; for this to
+					be advanced, it is enough that the
+					write i/o has been completed for any
+					one log group */
+	lsn_t		written_to_all_lsn;
+					/*!< first log sequence number not yet
+					written to some log group; for this to
+					be advanced, it is enough that the
+					write i/o has been completed for all
+					log groups.
+					Note that since InnoDB currently
+					has only one log group therefore
+					this value is redundant. Also it
+					is possible that this value
+					falls behind the
+					flushed_to_disk_lsn transiently.
+					It is appropriate to use either
+					flushed_to_disk_lsn or
+					write_lsn which are always
+					up-to-date and accurate. */
+	lsn_t		write_lsn;	/*!< end lsn for the current running
+					write */
+	ulint		write_end_offset;/*!< the data in buffer has
+					been written up to this offset
+					when the current write ends:
+					this field will then be copied
+					to buf_next_to_write */
+	lsn_t		current_flush_lsn;/*!< end lsn for the current running
+					write + flush operation */
+	lsn_t		flushed_to_disk_lsn;
+					/*!< how far we have written the log
+					AND flushed to disk */
+	ulint		n_pending_writes;/*!< number of currently
+					pending flushes or writes */
+	/* NOTE on the 'flush' in names of the fields below: starting from
+	4.0.14, we separate the write of the log file and the actual fsync()
+	or other method to flush it to disk. The names below shhould really
+	be 'flush_or_write'! */
+	os_event_t	no_flush_event;	/*!< this event is in the reset state
+					when a flush or a write is running;
+					a thread should wait for this without
+					owning the log mutex, but NOTE that
+					to set or reset this event, the
+					thread MUST own the log mutex! */
+	ibool		one_flushed;	/*!< during a flush, this is
+					first FALSE and becomes TRUE
+					when one log group has been
+					written or flushed */
+	os_event_t	one_flushed_event;/*!< this event is reset when the
+					flush or write has not yet completed
+					for any log group; e.g., this means
+					that a transaction has been committed
+					when this is set; a thread should wait
+					for this without owning the log mutex,
+					but NOTE that to set or reset this
+					event, the thread MUST own the log
+					mutex! */
+	ulint		n_log_ios;	/*!< number of log i/os initiated thus
+					far */
+	ulint		n_log_ios_old;	/*!< number of log i/o's at the
+					previous printout */
+	time_t		last_printout_time;/*!< when log_print was last time
+					called */
+	/* @} */
+
+	/** Fields involved in checkpoints @{ */
+	lsn_t		log_group_capacity; /*!< capacity of the log group; if
+					the checkpoint age exceeds this, it is
+					a serious error because it is possible
+					we will then overwrite log and spoil
+					crash recovery */
+	lsn_t		max_modified_age_async;
+					/*!< when this recommended
+					value for lsn -
+					buf_pool_get_oldest_modification()
+					is exceeded, we start an
+					asynchronous preflush of pool pages */
+	lsn_t		max_modified_age_sync;
+					/*!< when this recommended
+					value for lsn -
+					buf_pool_get_oldest_modification()
+					is exceeded, we start a
+					synchronous preflush of pool pages */
+	lsn_t		max_checkpoint_age_async;
+					/*!< when this checkpoint age
+					is exceeded we start an
+					asynchronous writing of a new
+					checkpoint */
+	lsn_t		max_checkpoint_age;
+					/*!< this is the maximum allowed value
+					for lsn - last_checkpoint_lsn when a
+					new query step is started */
+	ib_uint64_t	next_checkpoint_no;
+					/*!< next checkpoint number */
+	lsn_t		last_checkpoint_lsn;
+					/*!< latest checkpoint lsn */
+	lsn_t		next_checkpoint_lsn;
+					/*!< next checkpoint lsn */
+	ulint		n_pending_checkpoint_writes;
+					/*!< number of currently pending
+					checkpoint writes */
+	rw_lock_t	checkpoint_lock;/*!< this latch is x-locked when a
+					checkpoint write is running; a thread
+					should wait for this without owning
+					the log mutex */
+#endif /* !UNIV_HOTBACKUP */
+	byte*		checkpoint_buf_ptr;/* unaligned checkpoint header */
+	byte*		checkpoint_buf;	/*!< checkpoint header is read to this
+					buffer */
+	/* @} */
+#ifdef UNIV_LOG_ARCHIVE
+	/** Fields involved in archiving @{ */
+	ulint		archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
+					LOG_ARCH_STOPPED, LOG_ARCH_OFF */
+	lsn_t		archived_lsn;	/*!< archiving has advanced to this
+					lsn */
+	lsn_t		max_archived_lsn_age_async;
+					/*!< recommended maximum age of
+					archived_lsn, before we start
+					asynchronous copying to the archive */
+	lsn_t		max_archived_lsn_age;
+					/*!< maximum allowed age for
+					archived_lsn */
+	lsn_t		next_archived_lsn;/*!< during an archive write,
+					until the write is completed, we
+					store the next value for
+					archived_lsn here: the write
+					completion function then sets the new
+					value to archived_lsn */
+	ulint		archiving_phase;/*!< LOG_ARCHIVE_READ or
+					LOG_ARCHIVE_WRITE */
+	ulint		n_pending_archive_ios;
+					/*!< number of currently pending reads
+					or writes in archiving */
+	rw_lock_t	archive_lock;	/*!< this latch is x-locked when an
+					archive write is running; a thread
+					should wait for this without owning
+					the log mutex */
+	ulint		archive_buf_size;/*!< size of archive_buf */
+	byte*		archive_buf;	/*!< log segment is written to the
+					archive from this buffer */
+	os_event_t	archiving_on;	/*!< if archiving has been stopped,
+					a thread can wait for this event to
+					become signaled */
+	/* @} */
+#endif /* UNIV_LOG_ARCHIVE */
+};
+
+/** Test if flush order mutex is owned. */
+#define log_flush_order_mutex_own()	\
+	mutex_own(&log_sys->log_flush_order_mutex)
+
+/** Acquire the flush order mutex. */
+#define log_flush_order_mutex_enter() do {		\
+	mutex_enter(&log_sys->log_flush_order_mutex);	\
+} while (0)
+/** Release the flush order mutex. */
+# define log_flush_order_mutex_exit() do {		\
+	mutex_exit(&log_sys->log_flush_order_mutex);	\
+} while (0)
+
+#ifdef UNIV_LOG_ARCHIVE
+/** Archiving state @{ */
+#define LOG_ARCH_ON		71
+#define LOG_ARCH_STOPPING	72
+#define LOG_ARCH_STOPPING2	73
+#define LOG_ARCH_STOPPED	74
+#define LOG_ARCH_OFF		75
+/* @} */
+#endif /* UNIV_LOG_ARCHIVE */
+
+#ifndef UNIV_NONINL
+#include "log0log.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
new file mode 100644
index 00000000000..9fc12f766bf
--- /dev/null
+++ b/storage/innobase/include/log0log.ic
@@ -0,0 +1,462 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0log.ic
+Database log
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "mach0data.h"
+#include "mtr0mtr.h"
+#include "srv0mon.h"
+
+#ifdef UNIV_LOG_DEBUG
+/******************************************************//**
+Checks by parsing that the catenated log segment for a single mtr is
+consistent. */
+UNIV_INTERN
+ibool
+log_check_log_recs(
+/*===============*/
+	const byte*	buf,		/*!< in: pointer to the start of
+					the log segment in the
+					log_sys->buf log buffer */
+	ulint		len,		/*!< in: segment length in bytes */
+	ib_uint64_t	buf_start_lsn);	/*!< in: buffer start lsn */
+#endif /* UNIV_LOG_DEBUG */
+
+/************************************************************//**
+Gets a log block flush bit.
+@return	TRUE if this block was the first to be written in a log flush */
+UNIV_INLINE
+ibool
+log_block_get_flush_bit(
+/*====================*/
+	const byte*	log_block)	/*!< in: log block */
+{
+	if (LOG_BLOCK_FLUSH_BIT_MASK
+	    & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/************************************************************//**
+Sets the log block flush bit. */
+UNIV_INLINE
+void
+log_block_set_flush_bit(
+/*====================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ibool	val)		/*!< in: value to set */
+{
+	ulint	field;
+
+	field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO);
+
+	if (val) {
+		field = field | LOG_BLOCK_FLUSH_BIT_MASK;
+	} else {
+		field = field & ~LOG_BLOCK_FLUSH_BIT_MASK;
+	}
+
+	mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
+}
+
+/************************************************************//**
+Gets a log block number stored in the header.
+@return	log block number stored in the block header */
+UNIV_INLINE
+ulint
+log_block_get_hdr_no(
+/*=================*/
+	const byte*	log_block)	/*!< in: log block */
+{
+	return(~LOG_BLOCK_FLUSH_BIT_MASK
+	       & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
+}
+
+/************************************************************//**
+Sets the log block number stored in the header; NOTE that this must be set
+before the flush bit! */
+UNIV_INLINE
+void
+log_block_set_hdr_no(
+/*=================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	n)		/*!< in: log block number: must be > 0 and
+				< LOG_BLOCK_FLUSH_BIT_MASK */
+{
+	ut_ad(n > 0);
+	ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
+
+	mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
+}
+
+/************************************************************//**
+Gets a log block data length.
+@return	log block data length measured as a byte offset from the block start */
+UNIV_INLINE
+ulint
+log_block_get_data_len(
+/*===================*/
+	const byte*	log_block)	/*!< in: log block */
+{
+	return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
+}
+
+/************************************************************//**
+Sets the log block data length. */
+UNIV_INLINE
+void
+log_block_set_data_len(
+/*===================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	len)		/*!< in: data length */
+{
+	mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
+}
+
+/************************************************************//**
+Gets a log block first mtr log record group offset.
+@return first mtr log record group byte offset from the block start, 0
+if none */
+UNIV_INLINE
+ulint
+log_block_get_first_rec_group(
+/*==========================*/
+	const byte*	log_block)	/*!< in: log block */
+{
+	return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
+}
+
+/************************************************************//**
+Sets the log block first mtr log record group offset. */
+UNIV_INLINE
+void
+log_block_set_first_rec_group(
+/*==========================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	offset)		/*!< in: offset, 0 if none */
+{
+	mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
+}
+
+/************************************************************//**
+Gets a log block checkpoint number field (4 lowest bytes).
+@return	checkpoint no (4 lowest bytes) */
+UNIV_INLINE
+ulint
+log_block_get_checkpoint_no(
+/*========================*/
+	const byte*	log_block)	/*!< in: log block */
+{
+	return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
+}
+
+/************************************************************//**
+Sets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+void
+log_block_set_checkpoint_no(
+/*========================*/
+	byte*		log_block,	/*!< in/out: log block */
+	ib_uint64_t	no)		/*!< in: checkpoint no */
+{
+	mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no);
+}
+
+/************************************************************//**
+Converts a lsn to a log block number.
+@return	log block number, it is > 0 and <= 1G */
+UNIV_INLINE
+ulint
+log_block_convert_lsn_to_no(
+/*========================*/
+	lsn_t	lsn)	/*!< in: lsn of a byte within the block */
+{
+	return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
+}
+
+/************************************************************//**
+Calculates the checksum for a log block.
+@return	checksum */
+UNIV_INLINE
+ulint
+log_block_calc_checksum(
+/*====================*/
+	const byte*	block)	/*!< in: log block */
+{
+	ulint	sum;
+	ulint	sh;
+	ulint	i;
+
+	sum = 1;
+	sh = 0;
+
+	for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
+		ulint	b = (ulint) block[i];
+		sum &= 0x7FFFFFFFUL;
+		sum += b;
+		sum += b << sh;
+		sh++;
+		if (sh > 24) {
+			sh = 0;
+		}
+	}
+
+	return(sum);
+}
+
+/************************************************************//**
+Gets a log block checksum field value.
+@return	checksum */
+UNIV_INLINE
+ulint
+log_block_get_checksum(
+/*===================*/
+	const byte*	log_block)	/*!< in: log block */
+{
+	return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+				- LOG_BLOCK_CHECKSUM));
+}
+
+/************************************************************//**
+Sets a log block checksum field value. */
+UNIV_INLINE
+void
+log_block_set_checksum(
+/*===================*/
+	byte*	log_block,	/*!< in/out: log block */
+	ulint	checksum)	/*!< in: checksum */
+{
+	mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+			- LOG_BLOCK_CHECKSUM,
+			checksum);
+}
+
+/************************************************************//**
+Initializes a log block in the log buffer. */
+UNIV_INLINE
+void
+log_block_init(
+/*===========*/
+	byte*	log_block,	/*!< in: pointer to the log buffer */
+	lsn_t	lsn)		/*!< in: lsn within the log block */
+{
+	ulint	no;
+
+	ut_ad(mutex_own(&(log_sys->mutex)));
+
+	no = log_block_convert_lsn_to_no(lsn);
+
+	log_block_set_hdr_no(log_block, no);
+
+	log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
+	log_block_set_first_rec_group(log_block, 0);
+}
+
+/************************************************************//**
+Initializes a log block in the log buffer in the old format, where there
+was no checksum yet. */
+UNIV_INLINE
+void
+log_block_init_in_old_format(
+/*=========================*/
+	byte*	log_block,	/*!< in: pointer to the log buffer */
+	lsn_t	lsn)		/*!< in: lsn within the log block */
+{
+	ulint	no;
+
+	ut_ad(mutex_own(&(log_sys->mutex)));
+
+	no = log_block_convert_lsn_to_no(lsn);
+
+	log_block_set_hdr_no(log_block, no);
+	mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+			- LOG_BLOCK_CHECKSUM, no);
+	log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
+	log_block_set_first_rec_group(log_block, 0);
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Writes to the log the string given. The log must be released with
+log_release.
+@return	end lsn of the log record, zero if did not succeed */
+UNIV_INLINE
+lsn_t
+log_reserve_and_write_fast(
+/*=======================*/
+	const void*	str,	/*!< in: string */
+	ulint		len,	/*!< in: string length */
+	lsn_t*		start_lsn)/*!< out: start lsn of the log record */
+{
+	ulint		data_len;
+#ifdef UNIV_LOG_LSN_DEBUG
+	/* length of the LSN pseudo-record */
+	ulint		lsn_len;
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+	mutex_enter(&log_sys->mutex);
+#ifdef UNIV_LOG_LSN_DEBUG
+	lsn_len = 1
+		+ mach_get_compressed_size(log_sys->lsn >> 32)
+		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+	data_len = len
+#ifdef UNIV_LOG_LSN_DEBUG
+		+ lsn_len
+#endif /* UNIV_LOG_LSN_DEBUG */
+		+ log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+
+	if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+
+		/* The string does not fit within the current log block
+		or the log block would become full */
+
+		mutex_exit(&log_sys->mutex);
+
+		return(0);
+	}
+
+	*start_lsn = log_sys->lsn;
+
+#ifdef UNIV_LOG_LSN_DEBUG
+	{
+		/* Write the LSN pseudo-record. */
+		byte* b = &log_sys->buf[log_sys->buf_free];
+		*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
+		/* Write the LSN in two parts,
+		as a pseudo page number and space id. */
+		b += mach_write_compressed(b, log_sys->lsn >> 32);
+		b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
+		ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
+
+		memcpy(b, str, len);
+		len += lsn_len;
+	}
+#else /* UNIV_LOG_LSN_DEBUG */
+	memcpy(log_sys->buf + log_sys->buf_free, str, len);
+#endif /* UNIV_LOG_LSN_DEBUG */
+
+	log_block_set_data_len((byte*) ut_align_down(log_sys->buf
+						     + log_sys->buf_free,
+						     OS_FILE_LOG_BLOCK_SIZE),
+			       data_len);
+#ifdef UNIV_LOG_DEBUG
+	log_sys->old_buf_free = log_sys->buf_free;
+	log_sys->old_lsn = log_sys->lsn;
+#endif
+	log_sys->buf_free += len;
+
+	ut_ad(log_sys->buf_free <= log_sys->buf_size);
+
+	log_sys->lsn += len;
+
+	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
+		    log_sys->lsn - log_sys->last_checkpoint_lsn);
+
+#ifdef UNIV_LOG_DEBUG
+	log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
+			   log_sys->buf_free - log_sys->old_buf_free,
+			   log_sys->old_lsn);
+#endif
+	return(log_sys->lsn);
+}
+
+/***********************************************************************//**
+Releases the log mutex. */
+UNIV_INLINE
+void
+log_release(void)
+/*=============*/
+{
+	mutex_exit(&(log_sys->mutex));
+}
+
+/************************************************************//**
+Gets the current lsn.
+@return	current lsn */
+UNIV_INLINE
+lsn_t
+log_get_lsn(void)
+/*=============*/
+{
+	lsn_t	lsn;
+
+	mutex_enter(&(log_sys->mutex));
+
+	lsn = log_sys->lsn;
+
+	mutex_exit(&(log_sys->mutex));
+
+	return(lsn);
+}
+
+/****************************************************************
+Gets the log group capacity. It is OK to read the value without
+holding log_sys->mutex because it is constant.
+@return	log group capacity */
+UNIV_INLINE
+lsn_t
+log_get_capacity(void)
+/*==================*/
+{
+	return(log_sys->log_group_capacity);
+}
+
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return	max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void)
+/*================================*/
+{
+	return(log_sys->max_modified_age_async);
+}
+
+/***********************************************************************//**
+Checks if there is need for a log buffer flush or a new checkpoint, and does
+this if yes. Any database operation should call this when it has modified
+more than about 4 pages. NOTE that this function may only be called when the
+OS thread owns no synchronization objects except the dictionary mutex. */
+UNIV_INLINE
+void
+log_free_check(void)
+/*================*/
+{
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(sync_thread_levels_empty_except_dict());
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (log_sys->check_flush_or_checkpoint) {
+
+		log_check_margins();
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
new file mode 100644
index 00000000000..8ede49d4ecc
--- /dev/null
+++ b/storage/innobase/include/log0recv.h
@@ -0,0 +1,505 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0recv.h
+Recovery
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0recv_h
+#define log0recv_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "buf0types.h"
+#include "hash0hash.h"
+#include "log0log.h"
+#include <list>
+
+#ifdef UNIV_HOTBACKUP
+extern ibool	recv_replay_file_ops;
+
+/*******************************************************************//**
+Reads the checkpoint info needed in hot backup.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+recv_read_checkpoint_info_for_backup(
+/*=================================*/
+	const byte*	hdr,	/*!< in: buffer containing the log group
+				header */
+	lsn_t*		lsn,	/*!< out: checkpoint lsn */
+	lsn_t*		offset,	/*!< out: checkpoint offset in the log group */
+	lsn_t*		cp_no,	/*!< out: checkpoint number */
+	lsn_t*		first_header_lsn)
+				/*!< out: lsn of of the start of the
+				first log file */
+	__attribute__((nonnull));
+/*******************************************************************//**
+Scans the log segment and n_bytes_scanned is set to the length of valid
+log scanned. */
+UNIV_INTERN
+void
+recv_scan_log_seg_for_backup(
+/*=========================*/
+	byte*		buf,		/*!< in: buffer containing log data */
+	ulint		buf_len,	/*!< in: data length in that buffer */
+	lsn_t*		scanned_lsn,	/*!< in/out: lsn of buffer start,
+					we return scanned lsn */
+	ulint*		scanned_checkpoint_no,
+					/*!< in/out: 4 lowest bytes of the
+					highest scanned checkpoint number so
+					far */
+	ulint*		n_bytes_scanned);/*!< out: how much we were able to
+					scan, smaller than buf_len if log
+					data ended here */
+#endif /* UNIV_HOTBACKUP */
+/*******************************************************************//**
+Returns TRUE if recovery is currently running.
+@return	recv_recovery_on */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void);
+/*=====================*/
+#ifdef UNIV_LOG_ARCHIVE
+/*******************************************************************//**
+Returns TRUE if recovery from backup is currently running.
+@return	recv_recovery_from_backup_on */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void);
+/*=================================*/
+#endif /* UNIV_LOG_ARCHIVE */
+/************************************************************************//**
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool. */
+UNIV_INTERN
+void
+recv_recover_page_func(
+/*===================*/
+#ifndef UNIV_HOTBACKUP
+	ibool		just_read_in,
+				/*!< in: TRUE if the i/o handler calls
+				this for a freshly read page */
+#endif /* !UNIV_HOTBACKUP */
+	buf_block_t*	block);	/*!< in/out: buffer block */
+#ifndef UNIV_HOTBACKUP
+/** Wrapper for recv_recover_page_func().
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool.
+@param jri	in: TRUE if just read in (the i/o handler calls this for
+a freshly read page)
+@param block	in/out: the buffer block
+*/
+# define recv_recover_page(jri, block)	recv_recover_page_func(jri, block)
+#else /* !UNIV_HOTBACKUP */
+/** Wrapper for recv_recover_page_func().
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool.
+@param jri	in: TRUE if just read in (the i/o handler calls this for
+a freshly read page)
+@param block	in/out: the buffer block
+*/
+# define recv_recover_page(jri, block)	recv_recover_page_func(block)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************//**
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+recv_recovery_from_checkpoint_start_func(
+/*=====================================*/
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		type,		/*!< in: LOG_CHECKPOINT or
+					LOG_ARCHIVE */
+	lsn_t		limit_lsn,	/*!< in: recover up to this lsn
+					if possible */
+#endif /* UNIV_LOG_ARCHIVE */
+	lsn_t		min_flushed_lsn,/*!< in: min flushed lsn from
+					data files */
+	lsn_t		max_flushed_lsn);/*!< in: max flushed lsn from
+					 data files */
+#ifdef UNIV_LOG_ARCHIVE
+/** Wrapper for recv_recovery_from_checkpoint_start_func().
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@param type	in: LOG_CHECKPOINT or LOG_ARCHIVE
+@param lim	in: recover up to this log sequence number if possible
+@param min	in: minimum flushed log sequence number from data files
+@param max	in: maximum flushed log sequence number from data files
+@return	error code or DB_SUCCESS */
+# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
+	recv_recovery_from_checkpoint_start_func(type,lim,min,max)
+#else /* UNIV_LOG_ARCHIVE */
+/** Wrapper for recv_recovery_from_checkpoint_start_func().
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it.
+@param type	ignored: LOG_CHECKPOINT or LOG_ARCHIVE
+@param lim	ignored: recover up to this log sequence number if possible
+@param min	in: minimum flushed log sequence number from data files
+@param max	in: maximum flushed log sequence number from data files
+@return	error code or DB_SUCCESS */
+# define recv_recovery_from_checkpoint_start(type,lim,min,max)		\
+	recv_recovery_from_checkpoint_start_func(min,max)
+#endif /* UNIV_LOG_ARCHIVE */
+/********************************************************//**
+Completes recovery from a checkpoint. */
+UNIV_INTERN
+void
+recv_recovery_from_checkpoint_finish(void);
+/*======================================*/
+/********************************************************//**
+Initiates the rollback of active transactions. */
+UNIV_INTERN
+void
+recv_recovery_rollback_active(void);
+/*===============================*/
+/*******************************************************//**
+Scans log from a buffer and stores new log data to the parsing buffer.
+Parses and hashes the log records if new data found.  Unless
+UNIV_HOTBACKUP is defined, this function will apply log records
+automatically when the hash table becomes full.
+@return TRUE if limit_lsn has been reached, or not able to scan any
+more in this log group */
+UNIV_INTERN
+ibool
+recv_scan_log_recs(
+/*===============*/
+	ulint		available_memory,/*!< in: we let the hash table of recs
+					to grow to this size, at the maximum */
+	ibool		store_to_hash,	/*!< in: TRUE if the records should be
+					stored to the hash table; this is set
+					to FALSE if just debug checking is
+					needed */
+	const byte*	buf,		/*!< in: buffer containing a log
+					segment or garbage */
+	ulint		len,		/*!< in: buffer length */
+	lsn_t		start_lsn,	/*!< in: buffer start lsn */
+	lsn_t*		contiguous_lsn,	/*!< in/out: it is known that all log
+					groups contain contiguous log data up
+					to this lsn */
+	lsn_t*		group_scanned_lsn);/*!< out: scanning succeeded up to
+					this lsn */
+/******************************************************//**
+Resets the logs. The contents of log files will be lost! */
+UNIV_INTERN
+void
+recv_reset_logs(
+/*============*/
+#ifdef UNIV_LOG_ARCHIVE
+	ulint		arch_log_no,	/*!< in: next archived log file number */
+	ibool		new_logs_created,/*!< in: TRUE if resetting logs
+					is done at the log creation;
+					FALSE if it is done after
+					archive recovery */
+#endif /* UNIV_LOG_ARCHIVE */
+	lsn_t		lsn);		/*!< in: reset to this lsn
+					rounded up to be divisible by
+					OS_FILE_LOG_BLOCK_SIZE, after
+					which we add
+					LOG_BLOCK_HDR_SIZE */
+#ifdef UNIV_HOTBACKUP
+/******************************************************//**
+Creates new log files after a backup has been restored. */
+UNIV_INTERN
+void
+recv_reset_log_files_for_backup(
+/*============================*/
+	const char*	log_dir,	/*!< in: log file directory path */
+	ulint		n_log_files,	/*!< in: number of log files */
+	lsn_t		log_file_size,	/*!< in: log file size */
+	lsn_t		lsn);		/*!< in: new start lsn, must be
+					divisible by OS_FILE_LOG_BLOCK_SIZE */
+#endif /* UNIV_HOTBACKUP */
+/********************************************************//**
+Creates the recovery system. */
+UNIV_INTERN
+void
+recv_sys_create(void);
+/*=================*/
+/**********************************************************//**
+Release recovery system mutexes. */
+UNIV_INTERN
+void
+recv_sys_close(void);
+/*================*/
+/********************************************************//**
+Frees the recovery system memory. */
+UNIV_INTERN
+void
+recv_sys_mem_free(void);
+/*===================*/
+/********************************************************//**
+Inits the recovery system for a recovery operation. */
+UNIV_INTERN
+void
+recv_sys_init(
+/*==========*/
+	ulint	available_memory);	/*!< in: available memory in bytes */
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Reset the state of the recovery system variables. */
+UNIV_INTERN
+void
+recv_sys_var_init(void);
+/*===================*/
+#endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Empties the hash table of stored log records, applying them to appropriate
+pages. */
+UNIV_INTERN
+void
+recv_apply_hashed_log_recs(
+/*=======================*/
+	ibool	allow_ibuf);	/*!< in: if TRUE, also ibuf operations are
+				allowed during the application; if FALSE,
+				no ibuf operations are allowed, and after
+				the application all file pages are flushed to
+				disk and invalidated in buffer pool: this
+				alternative means that no new log records
+				can be generated during the application */
+#ifdef UNIV_HOTBACKUP
+/*******************************************************************//**
+Applies log records in the hash table to a backup. */
+UNIV_INTERN
+void
+recv_apply_log_recs_for_backup(void);
+/*================================*/
+#endif
+#ifdef UNIV_LOG_ARCHIVE
+/********************************************************//**
+Recovers from archived log files, and also from log files, if they exist.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+ulint
+recv_recovery_from_archive_start(
+/*=============================*/
+	lsn_t		min_flushed_lsn,/*!< in: min flushed lsn field from the
+					data files */
+	lsn_t		limit_lsn,	/*!< in: recover up to this lsn if
+					possible */
+	ulint		first_log_no);	/*!< in: number of the first archived
+					log file to use in the recovery; the
+					file will be searched from
+					INNOBASE_LOG_ARCH_DIR specified in
+					server config file */
+/********************************************************//**
+Completes recovery from archive. */
+UNIV_INTERN
+void
+recv_recovery_from_archive_finish(void);
+/*===================================*/
+#endif /* UNIV_LOG_ARCHIVE */
+
+/** Block of log record data */
+struct recv_data_t{
+	recv_data_t*	next;	/*!< pointer to the next block or NULL */
+				/*!< the log record data is stored physically
+				immediately after this struct, max amount
+				RECV_DATA_BLOCK_SIZE bytes of it */
+};
+
+/** Stored log record struct */
+struct recv_t{
+	byte		type;	/*!< log record type */
+	ulint		len;	/*!< log record body length in bytes */
+	recv_data_t*	data;	/*!< chain of blocks containing the log record
+				body */
+	lsn_t		start_lsn;/*!< start lsn of the log segment written by
+				the mtr which generated this log record: NOTE
+				that this is not necessarily the start lsn of
+				this log record */
+	lsn_t		end_lsn;/*!< end lsn of the log segment written by
+				the mtr which generated this log record: NOTE
+				that this is not necessarily the end lsn of
+				this log record */
+	UT_LIST_NODE_T(recv_t)
+			rec_list;/*!< list of log records for this page */
+};
+
+/** States of recv_addr_t */
+enum recv_addr_state {
+	/** not yet processed */
+	RECV_NOT_PROCESSED,
+	/** page is being read */
+	RECV_BEING_READ,
+	/** log records are being applied on the page */
+	RECV_BEING_PROCESSED,
+	/** log records have been applied on the page, or they have
+	been discarded because the tablespace does not exist */
+	RECV_PROCESSED
+};
+
+/** Hashed page file address struct */
+struct recv_addr_t{
+	enum recv_addr_state state;
+				/*!< recovery state of the page */
+	unsigned	space:32;/*!< space id */
+	unsigned	page_no:32;/*!< page number */
+	UT_LIST_BASE_NODE_T(recv_t)
+			rec_list;/*!< list of log records for this page */
+	hash_node_t	addr_hash;/*!< hash node in the hash bucket chain */
+};
+
+struct recv_dblwr_t {
+	void add(byte* page);
+
+	byte* find_page(ulint space_id, ulint page_no);
+
+	std::list<byte *> pages; /* Pages from double write buffer */
+
+	void operator() () {
+		pages.clear();
+	}
+};
+
+/** Recovery system data structure */
+struct recv_sys_t{
+#ifndef UNIV_HOTBACKUP
+	ib_mutex_t		mutex;	/*!< mutex protecting the fields apply_log_recs,
+				n_addrs, and the state field in each recv_addr
+				struct */
+	ib_mutex_t		writer_mutex;/*!< mutex coordinating
+				flushing between recv_writer_thread and
+				the recovery thread. */
+#endif /* !UNIV_HOTBACKUP */
+	ibool		apply_log_recs;
+				/*!< this is TRUE when log rec application to
+				pages is allowed; this flag tells the
+				i/o-handler if it should do log record
+				application */
+	ibool		apply_batch_on;
+				/*!< this is TRUE when a log rec application
+				batch is running */
+	lsn_t		lsn;	/*!< log sequence number */
+	ulint		last_log_buf_size;
+				/*!< size of the log buffer when the database
+				last time wrote to the log */
+	byte*		last_block;
+				/*!< possible incomplete last recovered log
+				block */
+	byte*		last_block_buf_start;
+				/*!< the nonaligned start address of the
+				preceding buffer */
+	byte*		buf;	/*!< buffer for parsing log records */
+	ulint		len;	/*!< amount of data in buf */
+	lsn_t		parse_start_lsn;
+				/*!< this is the lsn from which we were able to
+				start parsing log records and adding them to
+				the hash table; zero if a suitable
+				start point not found yet */
+	lsn_t		scanned_lsn;
+				/*!< the log data has been scanned up to this
+				lsn */
+	ulint		scanned_checkpoint_no;
+				/*!< the log data has been scanned up to this
+				checkpoint number (lowest 4 bytes) */
+	ulint		recovered_offset;
+				/*!< start offset of non-parsed log records in
+				buf */
+	lsn_t		recovered_lsn;
+				/*!< the log records have been parsed up to
+				this lsn */
+	lsn_t		limit_lsn;/*!< recovery should be made at most
+				up to this lsn */
+	ibool		found_corrupt_log;
+				/*!< this is set to TRUE if we during log
+				scan find a corrupt log block, or a corrupt
+				log record, or there is a log parsing
+				buffer overflow */
+#ifdef UNIV_LOG_ARCHIVE
+	log_group_t*	archive_group;
+				/*!< in archive recovery: the log group whose
+				archive is read */
+#endif /* !UNIV_LOG_ARCHIVE */
+	mem_heap_t*	heap;	/*!< memory heap of log records and file
+				addresses*/
+	hash_table_t*	addr_hash;/*!< hash table of file addresses of pages */
+	ulint		n_addrs;/*!< number of not processed hashed file
+				addresses in the hash table */
+
+	recv_dblwr_t	dblwr;
+};
+
+/** The recovery system */
+extern recv_sys_t*	recv_sys;
+
+/** TRUE when applying redo log records during crash recovery; FALSE
+otherwise.  Note that this is FALSE while a background thread is
+rolling back incomplete transactions. */
+extern ibool		recv_recovery_on;
+/** If the following is TRUE, the buffer pool file pages must be invalidated
+after recovery and no ibuf operations are allowed; this becomes TRUE if
+the log record hash table becomes too full, and log records must be merged
+to file pages already before the recovery is finished: in this case no
+ibuf operations are allowed, as they could modify the pages read in the
+buffer pool before the pages have been recovered to the up-to-date state.
+
+TRUE means that recovery is running and no operations on the log files
+are allowed yet: the variable name is misleading. */
+extern ibool		recv_no_ibuf_operations;
+/** TRUE when recv_init_crash_recovery() has been called. */
+extern ibool		recv_needed_recovery;
+#ifdef UNIV_DEBUG
+/** TRUE if writing to the redo log (mtr_commit) is forbidden.
+Protected by log_sys->mutex. */
+extern ibool		recv_no_log_write;
+#endif /* UNIV_DEBUG */
+
+/** TRUE if buf_page_is_corrupted() should check if the log sequence
+number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
+recv_recovery_from_checkpoint_start_func(). */
+extern ibool		recv_lsn_checks_on;
+#ifdef UNIV_HOTBACKUP
+/** TRUE when the redo log is being backed up */
+extern ibool		recv_is_making_a_backup;
+#endif /* UNIV_HOTBACKUP */
+/** Maximum page number encountered in the redo log */
+extern ulint		recv_max_parsed_page_no;
+
+/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
+times! */
+#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
+
+/** Size of block reads when the log groups are scanned forward to do a
+roll-forward */
+#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
+
+/** This many frames must be left free in the buffer pool when we scan
+the log and store the scanned log records in the buffer pool: we will
+use these free frames to read in pages when we start applying the
+log records to the database. */
+extern ulint	recv_n_pool_free_frames;
+
+#ifndef UNIV_NONINL
+#include "log0recv.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic
new file mode 100644
index 00000000000..32c28dd03e6
--- /dev/null
+++ b/storage/innobase/include/log0recv.ic
@@ -0,0 +1,53 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0recv.ic
+Recovery
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "univ.i"
+
+/*******************************************************************//**
+Returns TRUE if recovery is currently running.
+@return	recv_recovery_on */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void)
+/*=====================*/
+{
+	return(recv_recovery_on);
+}
+
+#ifdef UNIV_LOG_ARCHIVE
+/** TRUE when applying redo log records from an archived log file */
+extern ibool	recv_recovery_from_backup_on;
+
+/*******************************************************************//**
+Returns TRUE if recovery from backup is currently running.
+@return	recv_recovery_from_backup_on */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void)
+/*=================================*/
+{
+	return(recv_recovery_from_backup_on);
+}
+#endif /* UNIV_LOG_ARCHIVE */
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
new file mode 100644
index 00000000000..d0087f56aaa
--- /dev/null
+++ b/storage/innobase/include/mach0data.h
@@ -0,0 +1,418 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/mach0data.h
+Utilities for converting data from the database file
+to the machine format.
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef mach0data_h
+#define mach0data_h
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "univ.i"
+#include "ut0byte.h"
+
+/* The data and all fields are always stored in a database file
+in the same format: ascii, big-endian, ... .
+All data in the files MUST be accessed using the functions in this
+module. */
+
+/*******************************************************//**
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void
+mach_write_to_1(
+/*============*/
+	byte*	b,	/*!< in: pointer to byte where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored, >= 0, < 256 */
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return	ulint integer, >= 0, < 256 */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to byte */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lower address. */
+UNIV_INLINE
+void
+mach_write_to_2(
+/*============*/
+	byte*	b,	/*!< in: pointer to two bytes where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored, >= 0, < 64k */
+/********************************************************//**
+The following function is used to fetch data from two consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer, >= 0, < 64k */
+UNIV_INLINE
+ulint
+mach_read_from_2(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to two bytes */
+	__attribute__((nonnull, pure));
+
+/********************************************************//**
+The following function is used to convert a 16-bit data item
+to the canonical format, for fast bytewise equality test
+against memory.
+@return	16-bit integer in canonical format */
+UNIV_INLINE
+uint16
+mach_encode_2(
+/*==========*/
+	ulint	n)	/*!< in: integer in machine-dependent format */
+	__attribute__((const));
+/********************************************************//**
+The following function is used to convert a 16-bit data item
+from the canonical format, for fast bytewise equality test
+against memory.
+@return	integer in machine-dependent format */
+UNIV_INLINE
+ulint
+mach_decode_2(
+/*==========*/
+	uint16	n)	/*!< in: 16-bit integer in canonical format */
+	__attribute__((const));
+/*******************************************************//**
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_3(
+/*============*/
+	byte*	b,	/*!< in: pointer to 3 bytes where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_3(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 3 bytes */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_4(
+/*============*/
+	byte*	b,	/*!< in: pointer to four bytes where to store */
+	ulint	n);	 /*!< in: ulint integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_4(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to four bytes */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in a compressed form (1..5 bytes).
+@return	stored size in bytes */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+	byte*	b,	/*!< in: pointer to memory where to store */
+	ulint	n);	/*!< in: ulint integer to be stored */
+/*********************************************************//**
+Returns the size of an ulint when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+	ulint	n)	/*!< in: ulint integer to be stored */
+	__attribute__((const));
+/*********************************************************//**
+Reads a ulint in a compressed form.
+@return	read integer */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_6(
+/*============*/
+	byte*		b,	/*!< in: pointer to 6 bytes where to store */
+	ib_uint64_t	id);	/*!< in: 48-bit integer */
+/********************************************************//**
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	48-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_from_6(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 6 bytes */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_7(
+/*============*/
+	byte*		b,	/*!< in: pointer to 7 bytes where to store */
+	ib_uint64_t	n);	/*!< in: 56-bit integer */
+/********************************************************//**
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	56-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_from_7(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 7 bytes */
+	__attribute__((nonnull, pure));
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_8(
+/*============*/
+	void*		b,	/*!< in: pointer to 8 bytes where to store */
+	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	64-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_from_8(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 8 bytes */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a 64-bit integer in a compressed form (5..9 bytes).
+@return	size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_write_compressed(
+/*======================*/
+	byte*		b,	/*!< in: pointer to memory where to store */
+	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
+/*********************************************************//**
+Returns the size of a 64-bit integer when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_get_compressed_size(
+/*=========================*/
+	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form.
+@return	the value read */
+UNIV_INLINE
+ib_uint64_t
+mach_ull_read_compressed(
+/*=====================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a 64-bit integer in a compressed form (1..11 bytes).
+@return	size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_write_much_compressed(
+/*===========================*/
+	byte*		b,	/*!< in: pointer to memory where to store */
+	ib_uint64_t	n);	/*!< in: 64-bit integer to be stored */
+/*********************************************************//**
+Returns the size of a 64-bit integer when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_get_much_compressed_size(
+/*==============================*/
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+	__attribute__((const));
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form.
+@return	the value read */
+UNIV_INLINE
+ib_uint64_t
+mach_ull_read_much_compressed(
+/*==========================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Reads a ulint in a compressed form if the log record fully contains it.
+@return	pointer to end of the stored field, NULL if not complete */
+UNIV_INTERN
+byte*
+mach_parse_compressed(
+/*==================*/
+	byte*	ptr,	/*!< in: pointer to buffer from where to read */
+	byte*	end_ptr,/*!< in: pointer to end of the buffer */
+	ulint*	val);	/*!< out: read value */
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form
+if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INLINE
+byte*
+mach_ull_parse_compressed(
+/*======================*/
+	byte*		ptr,	/*!< in: pointer to buffer from where to read */
+	byte*		end_ptr,/*!< in: pointer to end of the buffer */
+	ib_uint64_t*	val);	/*!< out: read value */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************//**
+Reads a double. It is stored in a little-endian format.
+@return	double read */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+	byte*	b,	/*!< in: pointer to memory where to write */
+	double	d);	/*!< in: double */
+/*********************************************************//**
+Reads a float. It is stored in a little-endian format.
+@return	float read */
+UNIV_INLINE
+float
+mach_float_read(
+/*============*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*=============*/
+	byte*	b,	/*!< in: pointer to memory where to write */
+	float	d);	/*!< in: float */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+	const byte*	buf,		/*!< in: from where to read */
+	ulint		buf_size)	/*!< in: from how many bytes to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+	byte*	dest,		/*!< in: where to write */
+	ulint	dest_size,	/*!< in: into how many bytes to write */
+	ulint	n);		/*!< in: unsigned long int to write */
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+	const byte*	buf)		/*!< in: from where to read */
+	__attribute__((nonnull, pure));
+/*********************************************************//**
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+	byte*	dest,		/*!< in: where to write */
+	ulint	n);		/*!< in: unsigned long int to write */
+/*********************************************************//**
+Convert integral type from storage byte order (big endian) to
+host byte order.
+@return	integer value */
+UNIV_INLINE
+ib_uint64_t
+mach_read_int_type(
+/*===============*/
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	ibool		unsigned_type);	/*!< in: signed or unsigned flag */
+/***********************************************************//**
+Convert integral type from host byte order to (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+	byte*		dest,		/*!< in: where to write*/
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	bool		usign);		/*!< in: signed or unsigned flag */
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+	byte*		dest,		/*!< in: where to write */
+	ulonglong	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of dest */
+	bool		usign);		/*!< in: signed or unsigned flag */
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+
+#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
+
+#ifndef UNIV_NONINL
+#include "mach0data.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
new file mode 100644
index 00000000000..7449d2da2b8
--- /dev/null
+++ b/storage/innobase/include/mach0data.ic
@@ -0,0 +1,881 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/mach0data.ic
+Utilities for converting data from the database file
+to the machine format.
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "ut0mem.h"
+
+/*******************************************************//**
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void
+mach_write_to_1(
+/*============*/
+	byte*	b,	/*!< in: pointer to byte where to store */
+	ulint	n)	/*!< in: ulint integer to be stored, >= 0, < 256 */
+{
+	ut_ad(b);
+	ut_ad((n | 0xFFUL) <= 0xFFUL);
+
+	b[0] = (byte) n;
+}
+
+/********************************************************//**
+The following function is used to fetch data from one byte.
+@return	ulint integer, >= 0, < 256 */
+UNIV_INLINE
+ulint
+mach_read_from_1(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to byte */
+{
+	ut_ad(b);
+	return((ulint)(b[0]));
+}
+
+/*******************************************************//**
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_2(
+/*============*/
+	byte*	b,	/*!< in: pointer to two bytes where to store */
+	ulint	n)	/*!< in: ulint integer to be stored */
+{
+	ut_ad(b);
+	ut_ad((n | 0xFFFFUL) <= 0xFFFFUL);
+
+	b[0] = (byte)(n >> 8);
+	b[1] = (byte)(n);
+}
+
+/********************************************************//**
+The following function is used to fetch data from 2 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_2(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 2 bytes */
+{
+	return(((ulint)(b[0]) << 8) | (ulint)(b[1]));
+}
+
+/********************************************************//**
+The following function is used to convert a 16-bit data item
+to the canonical format, for fast bytewise equality test
+against memory.
+@return	16-bit integer in canonical format */
+UNIV_INLINE
+uint16
+mach_encode_2(
+/*==========*/
+	ulint	n)	/*!< in: integer in machine-dependent format */
+{
+	uint16	ret;
+	ut_ad(2 == sizeof ret);
+	mach_write_to_2((byte*) &ret, n);
+	return(ret);
+}
+/********************************************************//**
+The following function is used to convert a 16-bit data item
+from the canonical format, for fast bytewise equality test
+against memory.
+@return	integer in machine-dependent format */
+UNIV_INLINE
+ulint
+mach_decode_2(
+/*==========*/
+	uint16	n)	/*!< in: 16-bit integer in canonical format */
+{
+	ut_ad(2 == sizeof n);
+	return(mach_read_from_2((const byte*) &n));
+}
+
+/*******************************************************//**
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_3(
+/*============*/
+	byte*	b,	/*!< in: pointer to 3 bytes where to store */
+	ulint	n)	/*!< in: ulint integer to be stored */
+{
+	ut_ad(b);
+	ut_ad((n | 0xFFFFFFUL) <= 0xFFFFFFUL);
+
+	b[0] = (byte)(n >> 16);
+	b[1] = (byte)(n >> 8);
+	b[2] = (byte)(n);
+}
+
+/********************************************************//**
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_3(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 3 bytes */
+{
+	ut_ad(b);
+	return( ((ulint)(b[0]) << 16)
+		| ((ulint)(b[1]) << 8)
+		| (ulint)(b[2])
+		);
+}
+
+/*******************************************************//**
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_4(
+/*============*/
+	byte*	b,	/*!< in: pointer to four bytes where to store */
+	ulint	n)	/*!< in: ulint integer to be stored */
+{
+	ut_ad(b);
+
+	b[0] = (byte)(n >> 24);
+	b[1] = (byte)(n >> 16);
+	b[2] = (byte)(n >> 8);
+	b[3] = (byte) n;
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/********************************************************//**
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	ulint integer */
+UNIV_INLINE
+ulint
+mach_read_from_4(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to four bytes */
+{
+	ut_ad(b);
+	return( ((ulint)(b[0]) << 24)
+		| ((ulint)(b[1]) << 16)
+		| ((ulint)(b[2]) << 8)
+		| (ulint)(b[3])
+		);
+}
+
+#ifndef UNIV_INNOCHECKSUM
+
+/*********************************************************//**
+Writes a ulint in a compressed form where the first byte codes the
+length of the stored ulint. We look at the most significant bits of
+the byte. If the most significant bit is zero, it means 1-byte storage,
+else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
+it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
+else the storage is 5-byte.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+	byte*	b,	/*!< in: pointer to memory where to store */
+	ulint	n)	/*!< in: ulint integer (< 2^32) to be stored */
+{
+	ut_ad(b);
+
+	if (n < 0x80UL) {
+		mach_write_to_1(b, n);
+		return(1);
+	} else if (n < 0x4000UL) {
+		mach_write_to_2(b, n | 0x8000UL);
+		return(2);
+	} else if (n < 0x200000UL) {
+		mach_write_to_3(b, n | 0xC00000UL);
+		return(3);
+	} else if (n < 0x10000000UL) {
+		mach_write_to_4(b, n | 0xE0000000UL);
+		return(4);
+	} else {
+		mach_write_to_1(b, 0xF0UL);
+		mach_write_to_4(b + 1, n);
+		return(5);
+	}
+}
+
+/*********************************************************//**
+Returns the size of a ulint when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+	ulint	n)	/*!< in: ulint integer (< 2^32) to be stored */
+{
+	if (n < 0x80UL) {
+		return(1);
+	} else if (n < 0x4000UL) {
+		return(2);
+	} else if (n < 0x200000UL) {
+		return(3);
+	} else if (n < 0x10000000UL) {
+		return(4);
+	} else {
+		return(5);
+	}
+}
+
+/*********************************************************//**
+Reads a ulint in a compressed form.
+@return	read integer (< 2^32) */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+{
+	ulint	flag;
+
+	ut_ad(b);
+
+	flag = mach_read_from_1(b);
+
+	if (flag < 0x80UL) {
+		return(flag);
+	} else if (flag < 0xC0UL) {
+		return(mach_read_from_2(b) & 0x7FFFUL);
+	} else if (flag < 0xE0UL) {
+		return(mach_read_from_3(b) & 0x3FFFFFUL);
+	} else if (flag < 0xF0UL) {
+		return(mach_read_from_4(b) & 0x1FFFFFFFUL);
+	} else {
+		ut_ad(flag == 0xF0UL);
+		return(mach_read_from_4(b + 1));
+	}
+}
+
+/*******************************************************//**
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_8(
+/*============*/
+	void*		b,	/*!< in: pointer to 8 bytes where to store */
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+{
+	ut_ad(b);
+
+	mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32));
+	mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n);
+}
+
+/********************************************************//**
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	64-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_from_8(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 8 bytes */
+{
+	ib_uint64_t	ull;
+
+	ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
+	ull |= (ib_uint64_t) mach_read_from_4(b + 4);
+
+	return(ull);
+}
+
+/*******************************************************//**
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_7(
+/*============*/
+	byte*		b,	/*!< in: pointer to 7 bytes where to store */
+	ib_uint64_t	n)	/*!< in: 56-bit integer */
+{
+	ut_ad(b);
+
+	mach_write_to_3(b, (ulint) (n >> 32));
+	mach_write_to_4(b + 3, (ulint) n);
+}
+
+/********************************************************//**
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	56-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_from_7(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 7 bytes */
+{
+	ut_ad(b);
+
+	return(ut_ull_create(mach_read_from_3(b), mach_read_from_4(b + 3)));
+}
+
+/*******************************************************//**
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void
+mach_write_to_6(
+/*============*/
+	byte*		b,	/*!< in: pointer to 6 bytes where to store */
+	ib_uint64_t	n)	/*!< in: 48-bit integer */
+{
+	ut_ad(b);
+
+	mach_write_to_2(b, (ulint) (n >> 32));
+	mach_write_to_4(b + 2, (ulint) n);
+}
+
+/********************************************************//**
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address.
+@return	48-bit integer */
+UNIV_INLINE
+ib_uint64_t
+mach_read_from_6(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to 6 bytes */
+{
+	ut_ad(b);
+
+	return(ut_ull_create(mach_read_from_2(b), mach_read_from_4(b + 2)));
+}
+
+/*********************************************************//**
+Writes a 64-bit integer in a compressed form (5..9 bytes).
+@return	size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_write_compressed(
+/*======================*/
+	byte*		b,	/*!< in: pointer to memory where to store */
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+{
+	ulint	size;
+
+	ut_ad(b);
+
+	size = mach_write_compressed(b, (ulint) (n >> 32));
+	mach_write_to_4(b + size, (ulint) n);
+
+	return(size + 4);
+}
+
+/*********************************************************//**
+Returns the size of a 64-bit integer when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_get_compressed_size(
+/*=========================*/
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+{
+	return(4 + mach_get_compressed_size((ulint) (n >> 32)));
+}
+
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form.
+@return	the value read */
+UNIV_INLINE
+ib_uint64_t
+mach_ull_read_compressed(
+/*=====================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+{
+	ib_uint64_t	n;
+	ulint		size;
+
+	ut_ad(b);
+
+	n = (ib_uint64_t) mach_read_compressed(b);
+
+	size = mach_get_compressed_size((ulint) n);
+
+	n <<= 32;
+	n |= (ib_uint64_t) mach_read_from_4(b + size);
+
+	return(n);
+}
+
+/*********************************************************//**
+Writes a 64-bit integer in a compressed form (1..11 bytes).
+@return	size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_write_much_compressed(
+/*===========================*/
+	byte*		b,	/*!< in: pointer to memory where to store */
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+{
+	ulint	size;
+
+	ut_ad(b);
+
+	if (!(n >> 32)) {
+		return(mach_write_compressed(b, (ulint) n));
+	}
+
+	*b = (byte)0xFF;
+	size = 1 + mach_write_compressed(b + 1, (ulint) (n >> 32));
+
+	size += mach_write_compressed(b + size, (ulint) n & 0xFFFFFFFF);
+
+	return(size);
+}
+
+/*********************************************************//**
+Returns the size of a 64-bit integer when written in the compressed form.
+@return	compressed size in bytes */
+UNIV_INLINE
+ulint
+mach_ull_get_much_compressed_size(
+/*==============================*/
+	ib_uint64_t	n)	/*!< in: 64-bit integer to be stored */
+{
+	if (!(n >> 32)) {
+		return(mach_get_compressed_size((ulint) n));
+	}
+
+	return(1 + mach_get_compressed_size((ulint) (n >> 32))
+	       + mach_get_compressed_size((ulint) n & ULINT32_MASK));
+}
+
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form.
+@return	the value read */
+UNIV_INLINE
+ib_uint64_t
+mach_ull_read_much_compressed(
+/*==========================*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+{
+	ib_uint64_t	n;
+	ulint		size;
+
+	ut_ad(b);
+
+	if (*b != (byte)0xFF) {
+		n = 0;
+		size = 0;
+	} else {
+		n = (ib_uint64_t) mach_read_compressed(b + 1);
+
+		size = 1 + mach_get_compressed_size((ulint) n);
+		n <<= 32;
+	}
+
+	n |= mach_read_compressed(b + size);
+
+	return(n);
+}
+
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form
+if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INLINE
+byte*
+mach_ull_parse_compressed(
+/*======================*/
+	byte*		ptr,	/* in: pointer to buffer from where to read */
+	byte*		end_ptr,/* in: pointer to end of the buffer */
+	ib_uint64_t*	val)	/* out: read value */
+{
+	ulint		size;
+
+	ut_ad(ptr);
+	ut_ad(end_ptr);
+	ut_ad(val);
+
+	if (end_ptr < ptr + 5) {
+
+		return(NULL);
+	}
+
+	*val = mach_read_compressed(ptr);
+
+	size = mach_get_compressed_size((ulint) *val);
+
+	ptr += size;
+
+	if (end_ptr < ptr + 4) {
+
+		return(NULL);
+	}
+
+	*val <<= 32;
+	*val |= mach_read_from_4(ptr);
+
+	return(ptr + 4);
+}
+#ifndef UNIV_HOTBACKUP
+/*********************************************************//**
+Reads a double. It is stored in a little-endian format.
+@return	double read */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+{
+	double	d;
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*) &d;
+
+	for (i = 0; i < sizeof(double); i++) {
+#ifdef WORDS_BIGENDIAN
+		ptr[sizeof(double) - i - 1] = b[i];
+#else
+		ptr[i] = b[i];
+#endif
+	}
+
+	return(d);
+}
+
+/*********************************************************//**
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+	byte*	b,	/*!< in: pointer to memory where to write */
+	double	d)	/*!< in: double */
+{
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*) &d;
+
+	for (i = 0; i < sizeof(double); i++) {
+#ifdef WORDS_BIGENDIAN
+		b[i] = ptr[sizeof(double) - i - 1];
+#else
+		b[i] = ptr[i];
+#endif
+	}
+}
+
+/*********************************************************//**
+Reads a float. It is stored in a little-endian format.
+@return	float read */
+UNIV_INLINE
+float
+mach_float_read(
+/*============*/
+	const byte*	b)	/*!< in: pointer to memory from where to read */
+{
+	float	d;
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*) &d;
+
+	for (i = 0; i < sizeof(float); i++) {
+#ifdef WORDS_BIGENDIAN
+		ptr[sizeof(float) - i - 1] = b[i];
+#else
+		ptr[i] = b[i];
+#endif
+	}
+
+	return(d);
+}
+
+/*********************************************************//**
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*=============*/
+	byte*	b,	/*!< in: pointer to memory where to write */
+	float	d)	/*!< in: float */
+{
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*) &d;
+
+	for (i = 0; i < sizeof(float); i++) {
+#ifdef WORDS_BIGENDIAN
+		b[i] = ptr[sizeof(float) - i - 1];
+#else
+		b[i] = ptr[i];
+#endif
+	}
+}
+
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+	const byte*	buf,		/*!< in: from where to read */
+	ulint		buf_size)	/*!< in: from how many bytes to read */
+{
+	ulint	n	= 0;
+	const byte*	ptr;
+
+	ut_ad(buf_size > 0);
+
+	ptr = buf + buf_size;
+
+	for (;;) {
+		ptr--;
+
+		n = n << 8;
+
+		n += (ulint)(*ptr);
+
+		if (ptr == buf) {
+			break;
+		}
+	}
+
+	return(n);
+}
+
+/*********************************************************//**
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+	byte*	dest,		/*!< in: where to write */
+	ulint	dest_size,	/*!< in: into how many bytes to write */
+	ulint	n)		/*!< in: unsigned long int to write */
+{
+	byte*	end;
+
+	ut_ad(dest_size <= sizeof(ulint));
+	ut_ad(dest_size > 0);
+
+	end = dest + dest_size;
+
+	for (;;) {
+		*dest = (byte)(n & 0xFF);
+
+		n = n >> 8;
+
+		dest++;
+
+		if (dest == end) {
+			break;
+		}
+	}
+
+	ut_ad(n == 0);
+}
+
+/*********************************************************//**
+Reads a ulint stored in the little-endian format.
+@return	unsigned long int */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+	const byte*	buf)		/*!< in: from where to read */
+{
+	return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8));
+}
+
+/*********************************************************//**
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+	byte*	dest,		/*!< in: where to write */
+	ulint	n)		/*!< in: unsigned long int to write */
+{
+	ut_ad(n < 256 * 256);
+
+	*dest = (byte)(n & 0xFFUL);
+
+	n = n >> 8;
+	dest++;
+
+	*dest = (byte)(n & 0xFFUL);
+}
+
+/*********************************************************//**
+Convert integral type from storage byte order (big endian) to
+host byte order.
+@return	integer value */
+UNIV_INLINE
+ib_uint64_t
+mach_read_int_type(
+/*===============*/
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	ibool		unsigned_type)	/*!< in: signed or unsigned flag */
+{
+	/* XXX this can be optimized on big-endian machines */
+
+	ullint	ret;
+	uint	i;
+
+	if (unsigned_type || (src[0] & 0x80)) {
+
+		ret = 0x0000000000000000ULL;
+	} else {
+
+		ret = 0xFFFFFFFFFFFFFF00ULL;
+	}
+
+	if (unsigned_type) {
+
+		ret |= src[0];
+	} else {
+
+		ret |= src[0] ^ 0x80;
+	}
+
+	for (i = 1; i < len; i++) {
+		ret <<= 8;
+		ret |= src[i];
+	}
+
+	return(ret);
+}
+/*********************************************************//**
+Swap byte ordering. */
+UNIV_INLINE
+void
+mach_swap_byte_order(
+/*=================*/
+        byte*           dest,           /*!< out: where to write */
+        const byte*     from,           /*!< in: where to read from */
+        ulint           len)            /*!< in: length of src */
+{
+        ut_ad(len > 0);
+        ut_ad(len <= 8);
+
+        dest += len;
+
+        switch (len & 0x7) {
+        case 0: *--dest = *from++;
+        case 7: *--dest = *from++;
+        case 6: *--dest = *from++;
+        case 5: *--dest = *from++;
+        case 4: *--dest = *from++;
+        case 3: *--dest = *from++;
+        case 2: *--dest = *from++;
+        case 1: *--dest = *from;
+        }
+}
+
+/*************************************************************
+Convert integral type from host byte order (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+	byte*		dest,		/*!< in: where to write */
+	const byte*	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of src */
+	bool		usign)		/*!< in: signed or unsigned flag */
+{
+#ifdef WORDS_BIGENDIAN
+        memcpy(dest, src, len);
+#else
+        mach_swap_byte_order(dest, src, len);
+#endif /* WORDS_BIGENDIAN */
+
+	if (!usign) {
+		*dest ^=  0x80;
+	}
+}
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+	byte*		dest,		/*!< in: where to write */
+	ulonglong	src,		/*!< in: where to read from */
+	ulint		len,		/*!< in: length of dest */
+	bool		usign)		/*!< in: signed or unsigned flag */
+{
+	byte*		ptr = reinterpret_cast<byte*>(&src);
+
+	ut_ad(len <= sizeof(ulonglong));
+
+#ifdef WORDS_BIGENDIAN
+	memcpy(dest, ptr + (sizeof(src) - len), len);
+#else
+	mach_swap_byte_order(dest, reinterpret_cast<byte*>(ptr), len);
+#endif /* WORDS_BIGENDIAN */
+
+	if (!usign) {
+		*dest ^=  0x80;
+	}
+}
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type)	/*!< in: 1,2 or 4 bytes */
+{
+	switch (type) {
+	case 1:
+		return(mach_read_from_1(ptr));
+	case 2:
+		return(mach_read_from_2(ptr));
+	case 4:
+		return(mach_read_from_4(ptr));
+	default:
+		ut_error;
+	}
+
+	return(0);
+}
+
+#endif /* !UNIV_HOTBACKUP */
+#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
new file mode 100644
index 00000000000..cc339b82910
--- /dev/null
+++ b/storage/innobase/include/mem0dbg.h
@@ -0,0 +1,150 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0dbg.h
+The memory management: the debug code. This is not a compilation module,
+but is included in mem0mem.* !
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+/* In the debug version each allocated field is surrounded with
+check fields whose sizes are given below */
+
+#ifdef UNIV_MEM_DEBUG
+# ifndef UNIV_HOTBACKUP
+/* The mutex which protects in the debug version the hash table
+containing the list of live memory heaps, and also the global
+variables in mem0dbg.cc. */
+extern ib_mutex_t	mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
+
+#define MEM_FIELD_HEADER_SIZE	ut_calc_align(2 * sizeof(ulint),\
+						UNIV_MEM_ALIGNMENT)
+#define MEM_FIELD_TRAILER_SIZE	sizeof(ulint)
+#else
+#define MEM_FIELD_HEADER_SIZE	0
+#endif
+
+
+/* Space needed when allocating for a user a field of
+length N. The space is allocated only in multiples of
+UNIV_MEM_ALIGNMENT. In the debug version there are also
+check fields at the both ends of the field. */
+#ifdef UNIV_MEM_DEBUG
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
+		 + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
+#else
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
+#endif
+
+#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
+/***************************************************************//**
+Checks a memory heap for consistency and prints the contents if requested.
+Outputs the sum of sizes of buffers given to the user (only in
+the debug version), the physical size of the heap and the number of
+blocks in the heap. In case of error returns 0 as sizes and number
+of blocks. */
+UNIV_INTERN
+void
+mem_heap_validate_or_print(
+/*=======================*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	byte*		top,	/*!< in: calculate and validate only until
+				this top pointer in the heap is reached,
+				if this pointer is NULL, ignored */
+	ibool		 print,	 /*!< in: if TRUE, prints the contents
+				of the heap; works only in
+				the debug version */
+	ibool*		 error,	 /*!< out: TRUE if error */
+	ulint*		us_size,/*!< out: allocated memory
+				(for the user) in the heap,
+				if a NULL pointer is passed as this
+				argument, it is ignored; in the
+				non-debug version this is always -1 */
+	ulint*		ph_size,/*!< out: physical size of the heap,
+				if a NULL pointer is passed as this
+				argument, it is ignored */
+	ulint*		n_blocks); /*!< out: number of blocks in the heap,
+				if a NULL pointer is passed as this
+				argument, it is ignored */
+/**************************************************************//**
+Validates the contents of a memory heap.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+mem_heap_validate(
+/*==============*/
+	mem_heap_t*   heap);	/*!< in: memory heap */
+#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Checks that an object is a memory heap (or a block of it)
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+mem_heap_check(
+/*===========*/
+	mem_heap_t*   heap);	/*!< in: memory heap */
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_MEM_DEBUG
+/*****************************************************************//**
+TRUE if no memory is currently allocated.
+@return	TRUE if no heaps exist */
+UNIV_INTERN
+ibool
+mem_all_freed(void);
+/*===============*/
+/*****************************************************************//**
+Validates the dynamic memory
+@return	TRUE if error */
+UNIV_INTERN
+ibool
+mem_validate_no_assert(void);
+/*=========================*/
+/************************************************************//**
+Validates the dynamic memory
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+mem_validate(void);
+/*===============*/
+#endif /* UNIV_MEM_DEBUG */
+/************************************************************//**
+Tries to find neigboring memory allocation blocks and dumps to stderr
+the neighborhood of a given pointer. */
+UNIV_INTERN
+void
+mem_analyze_corruption(
+/*===================*/
+	void*	ptr);	/*!< in: pointer to place of possible corruption */
+/*****************************************************************//**
+Prints information of dynamic memory usage and currently allocated memory
+heaps or buffers. Can only be used in the debug version. */
+UNIV_INTERN
+void
+mem_print_info(void);
+/*================*/
+/*****************************************************************//**
+Prints information of dynamic memory usage and currently allocated memory
+heaps or buffers since the last ..._print_info or..._print_new_info. */
+UNIV_INTERN
+void
+mem_print_new_info(void);
+/*====================*/
diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic
new file mode 100644
index 00000000000..ec60ed35337
--- /dev/null
+++ b/storage/innobase/include/mem0dbg.ic
@@ -0,0 +1,109 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0dbg.ic
+The memory management: the debug code. This is not an independent
+compilation module but is included in mem0mem.*.
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifdef UNIV_MEM_DEBUG
+extern ulint	mem_current_allocated_memory;
+
+/******************************************************************//**
+Initializes an allocated memory field in the debug version. */
+UNIV_INTERN
+void
+mem_field_init(
+/*===========*/
+	byte*	buf,	/*!< in: memory field */
+	ulint	n);	/*!< in: how many bytes the user requested */
+/******************************************************************//**
+Erases an allocated memory field in the debug version. */
+UNIV_INTERN
+void
+mem_field_erase(
+/*============*/
+	byte*	buf,	/*!< in: memory field */
+	ulint	n);	/*!< in: how many bytes the user requested */
+/***************************************************************//**
+Initializes a buffer to a random combination of hex BA and BE.
+Used to initialize allocated memory. */
+UNIV_INTERN
+void
+mem_init_buf(
+/*=========*/
+	byte*	buf,	/*!< in: pointer to buffer */
+	ulint	 n);	 /*!< in: length of buffer */
+/***************************************************************//**
+Initializes a buffer to a random combination of hex DE and AD.
+Used to erase freed memory. */
+UNIV_INTERN
+void
+mem_erase_buf(
+/*==========*/
+	byte*	buf,	/*!< in: pointer to buffer */
+	ulint	n);	/*!< in: length of buffer */
+/***************************************************************//**
+Inserts a created memory heap to the hash table of
+current allocated memory heaps.
+Initializes the hash table when first called. */
+UNIV_INTERN
+void
+mem_hash_insert(
+/*============*/
+	mem_heap_t*	heap,	   /*!< in: the created heap */
+	const char*	file_name, /*!< in: file name of creation */
+	ulint		line);	   /*!< in: line where created */
+/***************************************************************//**
+Removes a memory heap (which is going to be freed by the caller)
+from the list of live memory heaps. Returns the size of the heap
+in terms of how much memory in bytes was allocated for the user of
+the heap (not the total space occupied by the heap).
+Also validates the heap.
+NOTE: This function does not free the storage occupied by the
+heap itself, only the node in the list of heaps. */
+UNIV_INTERN
+void
+mem_hash_remove(
+/*============*/
+	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
+	const char*	file_name, /*!< in: file name of freeing */
+	ulint		line);	   /*!< in: line where freed */
+
+
+void
+mem_field_header_set_len(byte* field, ulint len);
+
+ulint
+mem_field_header_get_len(byte* field);
+
+void
+mem_field_header_set_check(byte* field, ulint check);
+
+ulint
+mem_field_header_get_check(byte* field);
+
+void
+mem_field_trailer_set_check(byte* field, ulint check);
+
+ulint
+mem_field_trailer_get_check(byte* field);
+#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
new file mode 100644
index 00000000000..f30034f3074
--- /dev/null
+++ b/storage/innobase/include/mem0mem.h
@@ -0,0 +1,425 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0mem.h
+The memory management
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0mem_h
+#define mem0mem_h
+
+#include "univ.i"
+#include "ut0mem.h"
+#include "ut0byte.h"
+#include "ut0rnd.h"
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+#endif /* UNIV_HOTBACKUP */
+#include "ut0lst.h"
+#include "mach0data.h"
+
+/* -------------------- MEMORY HEAPS ----------------------------- */
+
+/* A block of a memory heap consists of the info structure
+followed by an area of memory */
+typedef struct mem_block_info_t	mem_block_t;
+
+/* A memory heap is a nonempty linear list of memory blocks */
+typedef mem_block_t		mem_heap_t;
+
+/* Types of allocation for memory heaps: DYNAMIC means allocation from the
+dynamic memory pool of the C compiler, BUFFER means allocation from the
+buffer pool; the latter method is used for very big heaps */
+
+#define MEM_HEAP_DYNAMIC	0	/* the most common type */
+#define MEM_HEAP_BUFFER		1
+#define MEM_HEAP_BTR_SEARCH	2	/* this flag can optionally be
+					ORed to MEM_HEAP_BUFFER, in which
+					case heap->free_block is used in
+					some cases for memory allocations,
+					and if it's NULL, the memory
+					allocation functions can return
+					NULL. */
+
+/* Different type of heaps in terms of which datastructure is using them */
+#define MEM_HEAP_FOR_BTR_SEARCH		(MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
+#define MEM_HEAP_FOR_PAGE_HASH		(MEM_HEAP_DYNAMIC)
+#define MEM_HEAP_FOR_RECV_SYS		(MEM_HEAP_BUFFER)
+#define MEM_HEAP_FOR_LOCK_HEAP		(MEM_HEAP_BUFFER)
+
+/* The following start size is used for the first block in the memory heap if
+the size is not specified, i.e., 0 is given as the parameter in the call of
+create. The standard size is the maximum (payload) size of the blocks used for
+allocations of small buffers. */
+
+#define MEM_BLOCK_START_SIZE		64
+#define MEM_BLOCK_STANDARD_SIZE		\
+	(UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
+
+/* If a memory heap is allowed to grow into the buffer pool, the following
+is the maximum size for a single allocated buffer: */
+#define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
+
+/******************************************************************//**
+Initializes the memory system. */
+UNIV_INTERN
+void
+mem_init(
+/*=====*/
+	ulint	size);	/*!< in: common pool size in bytes */
+/******************************************************************//**
+Closes the memory system. */
+UNIV_INTERN
+void
+mem_close(void);
+/*===========*/
+
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+
+# define mem_heap_create(N)	mem_heap_create_func(		\
+		(N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
+/**************************************************************//**
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+
+# define mem_heap_create_typed(N, T)	mem_heap_create_func(	\
+		(N), __FILE__, __LINE__, (T))
+
+#else /* UNIV_DEBUG */
+/**************************************************************//**
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+
+# define mem_heap_create(N)	mem_heap_create_func(		\
+		(N), MEM_HEAP_DYNAMIC)
+/**************************************************************//**
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+
+# define mem_heap_create_typed(N, T)	mem_heap_create_func(	\
+		(N), (T))
+
+#endif /* UNIV_DEBUG */
+/**************************************************************//**
+Use this macro instead of the corresponding function! Macro for memory
+heap freeing. */
+
+#define mem_heap_free(heap) mem_heap_free_func(\
+					  (heap), __FILE__, __LINE__)
+/*****************************************************************//**
+NOTE: Use the corresponding macros instead of this function. Creates a
+memory heap. For debugging purposes, takes also the file name and line as
+arguments.
+@return own: memory heap, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+mem_heap_t*
+mem_heap_create_func(
+/*=================*/
+	ulint		n,		/*!< in: desired start block size,
+					this means that a single user buffer
+					of size n will fit in the block,
+					0 creates a default size block */
+#ifdef UNIV_DEBUG
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line,		/*!< in: line where created */
+#endif /* UNIV_DEBUG */
+	ulint		type);		/*!< in: heap type */
+/*****************************************************************//**
+NOTE: Use the corresponding macro instead of this function. Frees the space
+occupied by a memory heap. In the debug version erases the heap memory
+blocks. */
+UNIV_INLINE
+void
+mem_heap_free_func(
+/*===============*/
+	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
+	const char*	file_name,	/*!< in: file name where freed */
+	ulint		line);		/*!< in: line where freed */
+/***************************************************************//**
+Allocates and zero-fills n bytes of memory from a memory heap.
+@return	allocated, zero-filled storage */
+UNIV_INLINE
+void*
+mem_heap_zalloc(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+/***************************************************************//**
+Allocates n bytes of memory from a memory heap.
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_alloc(
+/*===========*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+/*****************************************************************//**
+Returns a pointer to the heap top.
+@return	pointer to the heap top */
+UNIV_INLINE
+byte*
+mem_heap_get_heap_top(
+/*==================*/
+	mem_heap_t*	heap);	/*!< in: memory heap */
+/*****************************************************************//**
+Frees the space in a memory heap exceeding the pointer given. The
+pointer must have been acquired from mem_heap_get_heap_top. The first
+memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_free_heap_top(
+/*===================*/
+	mem_heap_t*	heap,	/*!< in: heap from which to free */
+	byte*		old_top);/*!< in: pointer to old top of heap */
+/*****************************************************************//**
+Empties a memory heap. The first memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_empty(
+/*===========*/
+	mem_heap_t*	heap);	/*!< in: heap to empty */
+/*****************************************************************//**
+Returns a pointer to the topmost element in a memory heap.
+The size of the element must be given.
+@return	pointer to the topmost element */
+UNIV_INLINE
+void*
+mem_heap_get_top(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: size of the topmost element */
+/*****************************************************************//**
+Frees the topmost element in a memory heap.
+The size of the element must be given. */
+UNIV_INLINE
+void
+mem_heap_free_top(
+/*==============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: size of the topmost element */
+/*****************************************************************//**
+Returns the space in bytes occupied by a memory heap. */
+UNIV_INLINE
+ulint
+mem_heap_get_size(
+/*==============*/
+	mem_heap_t*	heap);		/*!< in: heap */
+/**************************************************************//**
+Use this macro instead of the corresponding function!
+Macro for memory buffer allocation */
+
+#define mem_zalloc(N)	memset(mem_alloc(N), 0, (N))
+
+#ifdef UNIV_DEBUG
+#define mem_alloc(N)	mem_alloc_func((N), __FILE__, __LINE__, NULL)
+#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S))
+#else /* UNIV_DEBUG */
+#define mem_alloc(N)	mem_alloc_func((N), NULL)
+#define mem_alloc2(N,S) mem_alloc_func((N), (S))
+#endif /* UNIV_DEBUG */
+
+/***************************************************************//**
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed
+with mem_free.
+@return	own: free storage */
+UNIV_INLINE
+void*
+mem_alloc_func(
+/*===========*/
+	ulint		n,		/*!< in: requested size in bytes */
+#ifdef UNIV_DEBUG
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line,		/*!< in: line where created */
+#endif /* UNIV_DEBUG */
+	ulint*		size);		/*!< out: allocated size in bytes,
+					or NULL */
+
+/**************************************************************//**
+Use this macro instead of the corresponding function!
+Macro for memory buffer freeing */
+
+#define mem_free(PTR)	mem_free_func((PTR), __FILE__, __LINE__)
+/***************************************************************//**
+NOTE: Use the corresponding macro instead of this function.
+Frees a single buffer of storage from
+the dynamic memory of C compiler. Similar to free of C. */
+UNIV_INLINE
+void
+mem_free_func(
+/*==========*/
+	void*		ptr,		/*!< in, own: buffer to be freed */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line);		/*!< in: line where created */
+
+/**********************************************************************//**
+Duplicates a NUL-terminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
+UNIV_INLINE
+char*
+mem_strdup(
+/*=======*/
+	const char*	str);	/*!< in: string to be copied */
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
+UNIV_INLINE
+char*
+mem_strdupl(
+/*========*/
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len);	/*!< in: length of str, in bytes */
+
+/**********************************************************************//**
+Duplicates a NUL-terminated string, allocated from a memory heap.
+@return	own: a copy of the string */
+UNIV_INTERN
+char*
+mem_heap_strdup(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str);	/*!< in: string to be copied */
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string,
+allocated from a memory heap.
+@return	own: a copy of the string */
+UNIV_INLINE
+char*
+mem_heap_strdupl(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len);	/*!< in: length of str, in bytes */
+
+/**********************************************************************//**
+Concatenate two strings and return the result, using a memory heap.
+@return	own: the result */
+UNIV_INTERN
+char*
+mem_heap_strcat(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	s1,	/*!< in: string 1 */
+	const char*	s2);	/*!< in: string 2 */
+
+/**********************************************************************//**
+Duplicate a block of data, allocated from a memory heap.
+@return	own: a copy of the data */
+UNIV_INTERN
+void*
+mem_heap_dup(
+/*=========*/
+	mem_heap_t*	heap,	/*!< in: memory heap where copy is allocated */
+	const void*	data,	/*!< in: data to be copied */
+	ulint		len);	/*!< in: length of data, in bytes */
+
+/****************************************************************//**
+A simple sprintf replacement that dynamically allocates the space for the
+formatted string from the given heap. This supports a very limited set of
+the printf syntax: types 's' and 'u' and length modifier 'l' (which is
+required for the 'u' type).
+@return	heap-allocated formatted string */
+UNIV_INTERN
+char*
+mem_heap_printf(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	const char*	format,	/*!< in: format string */
+	...) __attribute__ ((format (printf, 2, 3)));
+
+#ifdef MEM_PERIODIC_CHECK
+/******************************************************************//**
+Goes through the list of all allocated mem blocks, checks their magic
+numbers, and reports possible corruption. */
+UNIV_INTERN
+void
+mem_validate_all_blocks(void);
+/*=========================*/
+#endif
+
+/*#######################################################################*/
+
+/** The info structure stored at the beginning of a heap block */
+struct mem_block_info_t {
+	ulint	magic_n;/* magic number for debugging */
+#ifdef UNIV_DEBUG
+	char	file_name[8];/* file name where the mem heap was created */
+	ulint	line;	/*!< line number where the mem heap was created */
+#endif /* UNIV_DEBUG */
+	UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
+			the list this is the base node of the list of blocks;
+			in subsequent blocks this is undefined */
+	UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next
+			and prev in the list. The first block allocated
+			to the heap is also the first block in this list,
+			though it also contains the base node of the list. */
+	ulint	len;	/*!< physical length of this block in bytes */
+	ulint	total_size; /*!< physical length in bytes of all blocks
+			in the heap. This is defined only in the base
+			node and is set to ULINT_UNDEFINED in others. */
+	ulint	type;	/*!< type of heap: MEM_HEAP_DYNAMIC, or
+			MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
+	ulint	free;	/*!< offset in bytes of the first free position for
+			user data in the block */
+	ulint	start;	/*!< the value of the struct field 'free' at the
+			creation of the block */
+#ifndef UNIV_HOTBACKUP
+	void*	free_block;
+			/* if the MEM_HEAP_BTR_SEARCH bit is set in type,
+			and this is the heap root, this can contain an
+			allocated buffer frame, which can be appended as a
+			free block to the heap, if we need more space;
+			otherwise, this is NULL */
+	void*	buf_block;
+			/* if this block has been allocated from the buffer
+			pool, this contains the buf_block_t handle;
+			otherwise, this is NULL */
+#endif /* !UNIV_HOTBACKUP */
+#ifdef MEM_PERIODIC_CHECK
+	UT_LIST_NODE_T(mem_block_t) mem_block_list;
+			/* List of all mem blocks allocated; protected
+			by the mem_comm_pool mutex */
+#endif
+};
+
+#define MEM_BLOCK_MAGIC_N	764741555
+#define MEM_FREED_BLOCK_MAGIC_N	547711122
+
+/* Header size for a memory heap block */
+#define MEM_BLOCK_HEADER_SIZE	ut_calc_align(sizeof(mem_block_info_t),\
+							UNIV_MEM_ALIGNMENT)
+#include "mem0dbg.h"
+
+#ifndef UNIV_NONINL
+#include "mem0mem.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
new file mode 100644
index 00000000000..0d983d69e1a
--- /dev/null
+++ b/storage/innobase/include/mem0mem.ic
@@ -0,0 +1,649 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0mem.ic
+The memory management
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0dbg.ic"
+#ifndef UNIV_HOTBACKUP
+# include "mem0pool.h"
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+# define mem_heap_create_block(heap, n, type, file_name, line)		\
+	mem_heap_create_block_func(heap, n, file_name, line, type)
+# define mem_heap_create_at(N, file_name, line)				\
+	mem_heap_create_func(N, file_name, line, MEM_HEAP_DYNAMIC)
+#else /* UNIV_DEBUG */
+# define mem_heap_create_block(heap, n, type, file_name, line)		\
+	mem_heap_create_block_func(heap, n, type)
+# define mem_heap_create_at(N, file_name, line)				\
+	mem_heap_create_func(N, MEM_HEAP_DYNAMIC)
+#endif /* UNIV_DEBUG */
+/***************************************************************//**
+Creates a memory heap block where data can be allocated.
+@return own: memory heap block, NULL if did not succeed (only possible
+for MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
+mem_block_t*
+mem_heap_create_block_func(
+/*=======================*/
+	mem_heap_t*	heap,	/*!< in: memory heap or NULL if first block
+				should be created */
+	ulint		n,	/*!< in: number of bytes needed for user data */
+#ifdef UNIV_DEBUG
+	const char*	file_name,/*!< in: file name where created */
+	ulint		line,	/*!< in: line where created */
+#endif /* UNIV_DEBUG */
+	ulint		type);	/*!< in: type of heap: MEM_HEAP_DYNAMIC or
+				MEM_HEAP_BUFFER */
+/******************************************************************//**
+Frees a block from a memory heap. */
+UNIV_INTERN
+void
+mem_heap_block_free(
+/*================*/
+	mem_heap_t*	heap,	/*!< in: heap */
+	mem_block_t*	block);	/*!< in: block to free */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Frees the free_block field from a memory heap. */
+UNIV_INTERN
+void
+mem_heap_free_block_free(
+/*=====================*/
+	mem_heap_t*	heap);	/*!< in: heap */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Adds a new block to a memory heap.
+@return created block, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INTERN
+mem_block_t*
+mem_heap_add_block(
+/*===============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n);	/*!< in: number of bytes user needs */
+
+UNIV_INLINE
+void
+mem_block_set_len(mem_block_t* block, ulint len)
+{
+	ut_ad(len > 0);
+
+	block->len = len;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_len(mem_block_t* block)
+{
+	return(block->len);
+}
+
+UNIV_INLINE
+void
+mem_block_set_type(mem_block_t* block, ulint type)
+{
+	ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
+	      || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
+
+	block->type = type;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_type(mem_block_t* block)
+{
+	return(block->type);
+}
+
+UNIV_INLINE
+void
+mem_block_set_free(mem_block_t* block, ulint free)
+{
+	ut_ad(free > 0);
+	ut_ad(free <= mem_block_get_len(block));
+
+	block->free = free;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_free(mem_block_t* block)
+{
+	return(block->free);
+}
+
+UNIV_INLINE
+void
+mem_block_set_start(mem_block_t* block, ulint start)
+{
+	ut_ad(start > 0);
+
+	block->start = start;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_start(mem_block_t* block)
+{
+	return(block->start);
+}
+
+/***************************************************************//**
+Allocates and zero-fills n bytes of memory from a memory heap.
+@return	allocated, zero-filled storage */
+UNIV_INLINE
+void*
+mem_heap_zalloc(
+/*============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+{
+	ut_ad(heap);
+	ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
+	return(memset(mem_heap_alloc(heap, n), 0, n));
+}
+
+/***************************************************************//**
+Allocates n bytes of memory from a memory heap.
+@return allocated storage, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+void*
+mem_heap_alloc(
+/*===========*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+{
+	mem_block_t*	block;
+	void*		buf;
+	ulint		free;
+
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
+
+	/* Check if there is enough space in block. If not, create a new
+	block to the heap */
+
+	if (mem_block_get_len(block)
+	    < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) {
+
+		block = mem_heap_add_block(heap, n);
+
+		if (block == NULL) {
+
+			return(NULL);
+		}
+	}
+
+	free = mem_block_get_free(block);
+
+	buf = (byte*) block + free;
+
+	mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
+
+#ifdef UNIV_MEM_DEBUG
+	UNIV_MEM_ALLOC(buf,
+		       n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
+
+	/* In the debug version write debugging info to the field */
+	mem_field_init((byte*) buf, n);
+
+	/* Advance buf to point at the storage which will be given to the
+	caller */
+	buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
+
+#endif
+	UNIV_MEM_ALLOC(buf, n);
+	return(buf);
+}
+
+/*****************************************************************//**
+Returns a pointer to the heap top.
+@return	pointer to the heap top */
+UNIV_INLINE
+byte*
+mem_heap_get_heap_top(
+/*==================*/
+	mem_heap_t*	heap)	/*!< in: memory heap */
+{
+	mem_block_t*	block;
+	byte*		buf;
+
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	buf = (byte*) block + mem_block_get_free(block);
+
+	return(buf);
+}
+
+/*****************************************************************//**
+Frees the space in a memory heap exceeding the pointer given. The
+pointer must have been acquired from mem_heap_get_heap_top. The first
+memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_free_heap_top(
+/*===================*/
+	mem_heap_t*	heap,	/*!< in: heap from which to free */
+	byte*		old_top)/*!< in: pointer to old top of heap */
+{
+	mem_block_t*	block;
+	mem_block_t*	prev_block;
+#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
+	ibool		error;
+	ulint		total_size;
+	ulint		size;
+
+	ut_ad(mem_heap_check(heap));
+
+	/* Validate the heap and get its total allocated size */
+	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
+				   NULL, NULL);
+	ut_a(!error);
+
+	/* Get the size below top pointer */
+	mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
+				   NULL);
+	ut_a(!error);
+
+#endif
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	while (block != NULL) {
+		if (((byte*) block + mem_block_get_free(block) >= old_top)
+		    && ((byte*) block <= old_top)) {
+			/* Found the right block */
+
+			break;
+		}
+
+		/* Store prev_block value before freeing the current block
+		(the current block will be erased in freeing) */
+
+		prev_block = UT_LIST_GET_PREV(list, block);
+
+		mem_heap_block_free(heap, block);
+
+		block = prev_block;
+	}
+
+	ut_ad(block);
+
+	/* Set the free field of block */
+	mem_block_set_free(block, old_top - (byte*) block);
+
+	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
+	UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
+#if defined UNIV_MEM_DEBUG
+	/* In the debug version erase block from top up */
+	mem_erase_buf(old_top, (byte*) block + block->len - old_top);
+
+	/* Update allocated memory count */
+	mutex_enter(&mem_hash_mutex);
+	mem_current_allocated_memory -= (total_size - size);
+	mutex_exit(&mem_hash_mutex);
+#endif /* UNIV_MEM_DEBUG */
+	UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
+
+	/* If free == start, we may free the block if it is not the first
+	one */
+
+	if ((heap != block) && (mem_block_get_free(block)
+				== mem_block_get_start(block))) {
+		mem_heap_block_free(heap, block);
+	}
+}
+
+/*****************************************************************//**
+Empties a memory heap. The first memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_empty(
+/*===========*/
+	mem_heap_t*	heap)	/*!< in: heap to empty */
+{
+	mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap));
+#ifndef UNIV_HOTBACKUP
+	if (heap->free_block) {
+		mem_heap_free_block_free(heap);
+	}
+#endif /* !UNIV_HOTBACKUP */
+}
+
+/*****************************************************************//**
+Returns a pointer to the topmost element in a memory heap. The size of the
+element must be given.
+@return	pointer to the topmost element */
+UNIV_INLINE
+void*
+mem_heap_get_top(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: size of the topmost element */
+{
+	mem_block_t*	block;
+	byte*		buf;
+
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
+
+#ifdef UNIV_MEM_DEBUG
+	ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block));
+
+	/* In the debug version, advance buf to point at the storage which
+	was given to the caller in the allocation*/
+
+	buf += MEM_FIELD_HEADER_SIZE;
+
+	/* Check that the field lengths agree */
+	ut_ad(n == mem_field_header_get_len(buf));
+#endif
+
+	return((void*) buf);
+}
+
+/*****************************************************************//**
+Frees the topmost element in a memory heap. The size of the element must be
+given. */
+UNIV_INLINE
+void
+mem_heap_free_top(
+/*==============*/
+	mem_heap_t*	heap,	/*!< in: memory heap */
+	ulint		n)	/*!< in: size of the topmost element */
+{
+	mem_block_t*	block;
+
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	/* Subtract the free field of block */
+	mem_block_set_free(block, mem_block_get_free(block)
+			   - MEM_SPACE_NEEDED(n));
+	UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n);
+#ifdef UNIV_MEM_DEBUG
+
+	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
+
+	/* In the debug version check the consistency, and erase field */
+	mem_field_erase((byte*) block + mem_block_get_free(block), n);
+#endif
+
+	/* If free == start, we may free the block if it is not the first
+	one */
+
+	if ((heap != block) && (mem_block_get_free(block)
+				== mem_block_get_start(block))) {
+		mem_heap_block_free(heap, block);
+	} else {
+		/* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a
+		subsequent invocation of mem_heap_free_top().
+		Originally, this was UNIV_MEM_FREE(), to catch writes
+		to freed memory. */
+		UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n);
+	}
+}
+
+/*****************************************************************//**
+NOTE: Use the corresponding macros instead of this function. Creates a
+memory heap. For debugging purposes, takes also the file name and line as
+argument.
+@return own: memory heap, NULL if did not succeed (only possible for
+MEM_HEAP_BTR_SEARCH type heaps) */
+UNIV_INLINE
+mem_heap_t*
+mem_heap_create_func(
+/*=================*/
+	ulint		n,		/*!< in: desired start block size,
+					this means that a single user buffer
+					of size n will fit in the block,
+					0 creates a default size block */
+#ifdef UNIV_DEBUG
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line,		/*!< in: line where created */
+#endif /* UNIV_DEBUG */
+	ulint		type)		/*!< in: heap type */
+{
+	mem_block_t*   block;
+
+	if (!n) {
+		n = MEM_BLOCK_START_SIZE;
+	}
+
+	block = mem_heap_create_block(NULL, n, type, file_name, line);
+
+	if (block == NULL) {
+
+		return(NULL);
+	}
+
+	UT_LIST_INIT(block->base);
+
+	/* Add the created block itself as the first block in the list */
+	UT_LIST_ADD_FIRST(list, block->base, block);
+
+#ifdef UNIV_MEM_DEBUG
+
+	mem_hash_insert(block, file_name, line);
+
+#endif
+
+	return(block);
+}
+
+/*****************************************************************//**
+NOTE: Use the corresponding macro instead of this function. Frees the space
+occupied by a memory heap. In the debug version erases the heap memory
+blocks. */
+UNIV_INLINE
+void
+mem_heap_free_func(
+/*===============*/
+	mem_heap_t*	heap,		/*!< in, own: heap to be freed */
+	const char*	file_name __attribute__((unused)),
+					/*!< in: file name where freed */
+	ulint		line  __attribute__((unused)))
+{
+	mem_block_t*	block;
+	mem_block_t*	prev_block;
+
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+#ifdef UNIV_MEM_DEBUG
+
+	/* In the debug version remove the heap from the hash table of heaps
+	and check its consistency */
+
+	mem_hash_remove(heap, file_name, line);
+
+#endif
+#ifndef UNIV_HOTBACKUP
+	if (heap->free_block) {
+		mem_heap_free_block_free(heap);
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	while (block != NULL) {
+		/* Store the contents of info before freeing current block
+		(it is erased in freeing) */
+
+		prev_block = UT_LIST_GET_PREV(list, block);
+
+		mem_heap_block_free(heap, block);
+
+		block = prev_block;
+	}
+}
+
+/***************************************************************//**
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed
+with mem_free.
+@return	own: free storage */
+UNIV_INLINE
+void*
+mem_alloc_func(
+/*===========*/
+	ulint		n,		/*!< in: desired number of bytes */
+#ifdef UNIV_DEBUG
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line,		/*!< in: line where created */
+#endif /* UNIV_DEBUG */
+	ulint*		size)		/*!< out: allocated size in bytes,
+					or NULL */
+{
+	mem_heap_t*	heap;
+	void*		buf;
+
+	heap = mem_heap_create_at(n, file_name, line);
+
+	/* Note that as we created the first block in the heap big enough
+	for the buffer requested by the caller, the buffer will be in the
+	first block and thus we can calculate the pointer to the heap from
+	the pointer to the buffer when we free the memory buffer. */
+
+	if (size) {
+		/* Adjust the allocation to the actual size of the
+		memory block. */
+		ulint	m = mem_block_get_len(heap)
+			- mem_block_get_free(heap);
+#ifdef UNIV_MEM_DEBUG
+		m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
+#endif /* UNIV_MEM_DEBUG */
+		ut_ad(m >= n);
+		n = m;
+		*size = m;
+	}
+
+	buf = mem_heap_alloc(heap, n);
+
+	ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE
+	     - MEM_FIELD_HEADER_SIZE);
+	return(buf);
+}
+
+/***************************************************************//**
+NOTE: Use the corresponding macro instead of this function. Frees a single
+buffer of storage from the dynamic memory of the C compiler. Similar to the
+free of C. */
+UNIV_INLINE
+void
+mem_free_func(
+/*==========*/
+	void*		ptr,		/*!< in, own: buffer to be freed */
+	const char*	file_name,	/*!< in: file name where created */
+	ulint		line)		/*!< in: line where created */
+{
+	mem_heap_t*   heap;
+
+	heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE
+			     - MEM_FIELD_HEADER_SIZE);
+	mem_heap_free_func(heap, file_name, line);
+}
+
+/*****************************************************************//**
+Returns the space in bytes occupied by a memory heap. */
+UNIV_INLINE
+ulint
+mem_heap_get_size(
+/*==============*/
+	mem_heap_t*	heap)	/*!< in: heap */
+{
+	ulint		size	= 0;
+
+	ut_ad(mem_heap_check(heap));
+
+	size = heap->total_size;
+
+#ifndef UNIV_HOTBACKUP
+	if (heap->free_block) {
+		size += UNIV_PAGE_SIZE;
+	}
+#endif /* !UNIV_HOTBACKUP */
+
+	return(size);
+}
+
+/**********************************************************************//**
+Duplicates a NUL-terminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
+UNIV_INLINE
+char*
+mem_strdup(
+/*=======*/
+	const char*	str)	/*!< in: string to be copied */
+{
+	ulint	len = strlen(str) + 1;
+	return((char*) memcpy(mem_alloc(len), str, len));
+}
+
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string.
+@return	own: a copy of the string, must be deallocated with mem_free */
+UNIV_INLINE
+char*
+mem_strdupl(
+/*========*/
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len)	/*!< in: length of str, in bytes */
+{
+	char*	s = (char*) mem_alloc(len + 1);
+	s[len] = 0;
+	return((char*) memcpy(s, str, len));
+}
+
+/**********************************************************************//**
+Makes a NUL-terminated copy of a nonterminated string,
+allocated from a memory heap.
+@return	own: a copy of the string */
+UNIV_INLINE
+char*
+mem_heap_strdupl(
+/*=============*/
+	mem_heap_t*	heap,	/*!< in: memory heap where string is allocated */
+	const char*	str,	/*!< in: string to be copied */
+	ulint		len)	/*!< in: length of str, in bytes */
+{
+	char*	s = (char*) mem_heap_alloc(heap, len + 1);
+	s[len] = 0;
+	return((char*) memcpy(s, str, len));
+}
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
new file mode 100644
index 00000000000..a65ba50fdf9
--- /dev/null
+++ b/storage/innobase/include/mem0pool.h
@@ -0,0 +1,121 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mem0pool.h
+The lowest-level memory management
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0pool_h
+#define mem0pool_h
+
+#include "univ.i"
+#include "os0file.h"
+#include "ut0lst.h"
+
+/** Memory pool */
+struct mem_pool_t;
+
+/** The common memory pool */
+extern mem_pool_t*	mem_comm_pool;
+
+/** Memory area header */
+struct mem_area_t{
+	ulint		size_and_free;	/*!< memory area size is obtained by
+					anding with ~MEM_AREA_FREE; area in
+					a free list if ANDing with
+					MEM_AREA_FREE results in nonzero */
+	UT_LIST_NODE_T(mem_area_t)
+			free_list;	/*!< free list node */
+};
+
+/** Each memory area takes this many extra bytes for control information */
+#define MEM_AREA_EXTRA_SIZE	(ut_calc_align(sizeof(struct mem_area_t),\
+			UNIV_MEM_ALIGNMENT))
+
+/********************************************************************//**
+Creates a memory pool.
+@return	memory pool */
+UNIV_INTERN
+mem_pool_t*
+mem_pool_create(
+/*============*/
+	ulint	size);	/*!< in: pool size in bytes */
+/********************************************************************//**
+Frees a memory pool. */
+UNIV_INTERN
+void
+mem_pool_free(
+/*==========*/
+	mem_pool_t*	pool);	/*!< in, own: memory pool */
+/********************************************************************//**
+Allocates memory from a pool. NOTE: This low-level function should only be
+used in mem0mem.*!
+@return	own: allocated memory buffer */
+UNIV_INTERN
+void*
+mem_area_alloc(
+/*===========*/
+	ulint*		psize,	/*!< in: requested size in bytes; for optimum
+				space usage, the size should be a power of 2
+				minus MEM_AREA_EXTRA_SIZE;
+				out: allocated size in bytes (greater than
+				or equal to the requested size) */
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Frees memory to a pool. */
+UNIV_INTERN
+void
+mem_area_free(
+/*==========*/
+	void*		ptr,	/*!< in, own: pointer to allocated memory
+				buffer */
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Returns the amount of reserved memory.
+@return	reserved mmeory in bytes */
+UNIV_INTERN
+ulint
+mem_pool_get_reserved(
+/*==================*/
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Validates a memory pool.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+mem_pool_validate(
+/*==============*/
+	mem_pool_t*	pool);	/*!< in: memory pool */
+/********************************************************************//**
+Prints info of a memory pool. */
+UNIV_INTERN
+void
+mem_pool_print_info(
+/*================*/
+	FILE*		outfile,/*!< in: output file to write to */
+	mem_pool_t*	pool);	/*!< in: memory pool */
+
+
+#ifndef UNIV_NONINL
+#include "mem0pool.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic
new file mode 100644
index 00000000000..f4bafb8ba63
--- /dev/null
+++ b/storage/innobase/include/mem0pool.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/mem0pool.ic
+The lowest-level memory management
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
new file mode 100644
index 00000000000..18a345d050f
--- /dev/null
+++ b/storage/innobase/include/mtr0log.h
@@ -0,0 +1,251 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0log.h
+Mini-transaction logging routines
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0log_h
+#define mtr0log_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+#include "dict0types.h"
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log if mtr is not NULL. */
+UNIV_INTERN
+void
+mlog_write_ulint(
+/*=============*/
+	byte*	ptr,	/*!< in: pointer where to write */
+	ulint	val,	/*!< in: value to write */
+	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes 8 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log, only if mtr is not NULL */
+UNIV_INTERN
+void
+mlog_write_ull(
+/*===========*/
+	byte*		ptr,	/*!< in: pointer where to write */
+	ib_uint64_t	val,	/*!< in: value to write */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes a string to a file page buffered in the buffer pool. Writes the
+corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_write_string(
+/*==============*/
+	byte*		ptr,	/*!< in: pointer where to write */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len,	/*!< in: string length */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Logs a write of a string to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+UNIV_INTERN
+void
+mlog_log_string(
+/*============*/
+	byte*	ptr,	/*!< in: pointer written to */
+	ulint	len,	/*!< in: string length */
+	mtr_t*	mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes initial part of a log record consisting of one-byte item
+type and four-byte space and page numbers. */
+UNIV_INTERN
+void
+mlog_write_initial_log_record(
+/*==========================*/
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Writes a log record about an .ibd file create/delete/rename.
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_for_file_op(
+/*======================================*/
+	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+			MLOG_FILE_RENAME */
+	ulint	space_id,/*!< in: space id, if applicable */
+	ulint	page_no,/*!< in: page number (not relevant currently) */
+	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
+	mtr_t*	mtr);	/*!< in: mtr */
+/********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val,	/*!< in: value to write */
+	ulint	type);	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+/********************************************************//**
+Catenates n bytes to the mtr log. */
+UNIV_INTERN
+void
+mlog_catenate_string(
+/*=================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	str,	/*!< in: string to write */
+	ulint		len);	/*!< in: string length */
+/********************************************************//**
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val);	/*!< in: value to write */
+/********************************************************//**
+Catenates a compressed 64-bit integer to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ull_compressed(
+/*=========================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	ib_uint64_t	val);	/*!< in: value to write */
+/********************************************************//**
+Opens a buffer to mlog. It must be closed with mlog_close.
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	size);	/*!< in: buffer size in bytes; MUST be
+			smaller than DYN_ARRAY_DATA_SIZE! */
+/********************************************************//**
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	byte*	ptr);	/*!< in: buffer space from ptr up was not used */
+/********************************************************//**
+Writes the initial part of a log record (3..11 bytes).
+If the implementation of this function is changed, all
+size parameters to mlog_open() should be adjusted accordingly!
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	byte*		log_ptr,/*!< in: pointer to mtr log which has
+				been opened */
+	mtr_t*		mtr);	/*!< in: mtr */
+#else /* !UNIV_HOTBACKUP */
+# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
+# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte*) 0)
+#endif /* !UNIV_HOTBACKUP */
+/********************************************************//**
+Parses an initial log record written by mlog_write_initial_log_record.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_initial_log_record(
+/*==========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
+	ulint*	space,	/*!< out: space id */
+	ulint*	page_no);/*!< out: page number */
+/********************************************************//**
+Parses a log record written by mlog_write_ulint or mlog_write_ull.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_nbytes(
+/*==============*/
+	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	page,	/*!< in: page where to apply the log record, or NULL */
+	void*	page_zip);/*!< in/out: compressed page, or NULL */
+/********************************************************//**
+Parses a log record written by mlog_write_string.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_string(
+/*==============*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	byte*	page,	/*!< in: page where to apply the log record, or NULL */
+	void*	page_zip);/*!< in/out: compressed page, or NULL */
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Opens a buffer for mlog, writes the initial log record and,
+if needed, the field lengths of an index.  Reserves space
+for further log entries.  The log entry must be closed with
+mtr_close().
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INTERN
+byte*
+mlog_open_and_write_index(
+/*======================*/
+	mtr_t*			mtr,	/*!< in: mtr */
+	const byte*		rec,	/*!< in: index record or page */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	byte			type,	/*!< in: log item type */
+	ulint			size);	/*!< in: requested buffer size in bytes
+					(if 0, calls mlog_close() and
+					returns NULL) */
+#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************//**
+Parses a log record written by mlog_open_and_write_index.
+@return	parsed record end, NULL if not a complete record */
+UNIV_INTERN
+byte*
+mlog_parse_index(
+/*=============*/
+	byte*		ptr,	/*!< in: buffer */
+	const byte*	end_ptr,/*!< in: buffer end */
+	ibool		comp,	/*!< in: TRUE=compact record format */
+	dict_index_t**	index);	/*!< out, own: dummy index */
+
+#ifndef UNIV_HOTBACKUP
+/* Insert, update, and maybe other functions may use this value to define an
+extra mlog buffer size for variable size data */
+#define MLOG_BUF_MARGIN	256
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "mtr0log.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
new file mode 100644
index 00000000000..3ed4876eeab
--- /dev/null
+++ b/storage/innobase/include/mtr0log.ic
@@ -0,0 +1,276 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0log.ic
+Mini-transaction logging routines
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "ut0lst.h"
+#include "buf0buf.h"
+#include "buf0dblwr.h"
+#include "fsp0types.h"
+#include "trx0sys.h"
+
+/********************************************************//**
+Opens a buffer to mlog. It must be closed with mlog_close.
+@return	buffer, NULL if log mode MTR_LOG_NONE */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	size)	/*!< in: buffer size in bytes; MUST be
+			smaller than DYN_ARRAY_DATA_SIZE! */
+{
+	dyn_array_t*	mlog;
+
+	mtr->modifications = TRUE;
+
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+		return(NULL);
+	}
+
+	mlog = &(mtr->log);
+
+	return(dyn_array_open(mlog, size));
+}
+
+/********************************************************//**
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	byte*	ptr)	/*!< in: buffer space from ptr up was not used */
+{
+	dyn_array_t*	mlog;
+
+	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+
+	mlog = &(mtr->log);
+
+	dyn_array_close(mlog, ptr);
+}
+
+#ifndef UNIV_HOTBACKUP
+/********************************************************//**
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val,	/*!< in: value to write */
+	ulint	type)	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+	dyn_array_t*	mlog;
+	byte*		ptr;
+
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+		return;
+	}
+
+	mlog = &(mtr->log);
+
+#if MLOG_1BYTE != 1
+# error "MLOG_1BYTE != 1"
+#endif
+#if MLOG_2BYTES != 2
+# error "MLOG_2BYTES != 2"
+#endif
+#if MLOG_4BYTES != 4
+# error "MLOG_4BYTES != 4"
+#endif
+#if MLOG_8BYTES != 8
+# error "MLOG_8BYTES != 8"
+#endif
+	ptr = (byte*) dyn_array_push(mlog, type);
+
+	if (type == MLOG_4BYTES) {
+		mach_write_to_4(ptr, val);
+	} else if (type == MLOG_2BYTES) {
+		mach_write_to_2(ptr, val);
+	} else {
+		ut_ad(type == MLOG_1BYTE);
+		mach_write_to_1(ptr, val);
+	}
+}
+
+/********************************************************//**
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	val)	/*!< in: value to write */
+{
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 10);
+
+	/* If no logging is requested, we may return now */
+	if (log_ptr == NULL) {
+
+		return;
+	}
+
+	log_ptr += mach_write_compressed(log_ptr, val);
+
+	mlog_close(mtr, log_ptr);
+}
+
+/********************************************************//**
+Catenates a compressed 64-bit integer to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ull_compressed(
+/*=========================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	ib_uint64_t	val)	/*!< in: value to write */
+{
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 15);
+
+	/* If no logging is requested, we may return now */
+	if (log_ptr == NULL) {
+
+		return;
+	}
+
+	log_ptr += mach_ull_write_compressed(log_ptr, val);
+
+	mlog_close(mtr, log_ptr);
+}
+
+/********************************************************//**
+Writes the initial part of a log record (3..11 bytes).
+If the implementation of this function is changed, all
+size parameters to mlog_open() should be adjusted accordingly!
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
+				frame holding the file page where
+				modification is made */
+	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
+	byte*		log_ptr,/*!< in: pointer to mtr log which has
+				been opened */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+#ifdef UNIV_DEBUG
+	buf_block_t*	block;
+#endif
+	const byte*	page;
+	ulint		space;
+	ulint		offset;
+
+	ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(type <= MLOG_BIGGEST_TYPE);
+	ut_ad(ptr && log_ptr);
+
+	page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+	space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
+
+	/* check whether the page is in the doublewrite buffer;
+	the doublewrite buffer is located in pages
+	FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
+	system tablespace */
+	if (space == TRX_SYS_SPACE
+	    && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
+		if (buf_dblwr_being_created) {
+			/* Do nothing: we only come to this branch in an
+			InnoDB database creation. We do not redo log
+			anything for the doublewrite buffer pages. */
+			return(log_ptr);
+		} else {
+			fprintf(stderr,
+				"Error: trying to redo log a record of type "
+				"%d on page %lu of space %lu in the "
+				"doublewrite buffer, continuing anyway.\n"
+				"Please post a bug report to "
+				"bugs.mysql.com.\n",
+				type, offset, space);
+			ut_ad(0);
+		}
+	}
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+	log_ptr += mach_write_compressed(log_ptr, space);
+	log_ptr += mach_write_compressed(log_ptr, offset);
+
+	mtr->n_log_recs++;
+
+#ifdef UNIV_LOG_DEBUG
+	fprintf(stderr,
+		"Adding to mtr log record type %lu space %lu page no %lu\n",
+		(ulong) type, space, offset);
+#endif
+
+#ifdef UNIV_DEBUG
+	/* We now assume that all x-latched pages have been modified! */
+	block = (buf_block_t*) buf_block_align(ptr);
+
+	if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
+
+		mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
+	}
+#endif
+	return(log_ptr);
+}
+
+/********************************************************//**
+Writes a log record about an .ibd file create/delete/rename.
+@return	new value of log_ptr */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_for_file_op(
+/*======================================*/
+	ulint	type,	/*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+			MLOG_FILE_RENAME */
+	ulint	space_id,/*!< in: space id, if applicable */
+	ulint	page_no,/*!< in: page number (not relevant currently) */
+	byte*	log_ptr,/*!< in: pointer to mtr log which has been opened */
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	ut_ad(log_ptr);
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+
+	/* We write dummy space id and page number */
+	log_ptr += mach_write_compressed(log_ptr, space_id);
+	log_ptr += mach_write_compressed(log_ptr, page_no);
+
+	mtr->n_log_recs++;
+
+	return(log_ptr);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
new file mode 100644
index 00000000000..ed7fd76d425
--- /dev/null
+++ b/storage/innobase/include/mtr0mtr.h
@@ -0,0 +1,420 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0mtr.h
+Mini-transaction buffer
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0mtr_h
+#define mtr0mtr_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "dyn0dyn.h"
+#include "buf0types.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+#include "page0types.h"
+
+/* Logging modes for a mini-transaction */
+#define MTR_LOG_ALL		21	/* default mode: log all operations
+					modifying disk-based data */
+#define	MTR_LOG_NONE		22	/* log no operations */
+#define	MTR_LOG_NO_REDO		23	/* Don't generate REDO */
+/*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
+					file space page allocation data
+					(operations in fsp0fsp.* ) */
+#define	MTR_LOG_SHORT_INSERTS	24	/* inserts are logged in a shorter
+					form */
+
+/* Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+#define	MTR_MEMO_PAGE_S_FIX	RW_S_LATCH
+#define	MTR_MEMO_PAGE_X_FIX	RW_X_LATCH
+#define	MTR_MEMO_BUF_FIX	RW_NO_LATCH
+#ifdef UNIV_DEBUG
+# define MTR_MEMO_MODIFY	54
+#endif /* UNIV_DEBUG */
+#define	MTR_MEMO_S_LOCK		55
+#define	MTR_MEMO_X_LOCK		56
+
+/** @name Log item types
+The log items are declared 'byte' so that the compiler can warn if val
+and type parameters are switched in a call to mlog_write_ulint. NOTE!
+For 1 - 8 bytes, the flag value must give the length also! @{ */
+#define	MLOG_SINGLE_REC_FLAG	128		/*!< if the mtr contains only
+						one log record for one page,
+						i.e., write_initial_log_record
+						has been called only once,
+						this flag is ORed to the type
+						of that first log record */
+#define	MLOG_1BYTE		(1)		/*!< one byte is written */
+#define	MLOG_2BYTES		(2)		/*!< 2 bytes ... */
+#define	MLOG_4BYTES		(4)		/*!< 4 bytes ... */
+#define	MLOG_8BYTES		(8)		/*!< 8 bytes ... */
+#define	MLOG_REC_INSERT		((byte)9)	/*!< record insert */
+#define	MLOG_REC_CLUST_DELETE_MARK ((byte)10)	/*!< mark clustered index record
+						deleted */
+#define	MLOG_REC_SEC_DELETE_MARK ((byte)11)	/*!< mark secondary index record
+						deleted */
+#define MLOG_REC_UPDATE_IN_PLACE ((byte)13)	/*!< update of a record,
+						preserves record field sizes */
+#define MLOG_REC_DELETE		((byte)14)	/*!< delete a record from a
+						page */
+#define	MLOG_LIST_END_DELETE	((byte)15)	/*!< delete record list end on
+						index page */
+#define	MLOG_LIST_START_DELETE	((byte)16)	/*!< delete record list start on
+						index page */
+#define	MLOG_LIST_END_COPY_CREATED ((byte)17)	/*!< copy record list end to a
+						new created index page */
+#define	MLOG_PAGE_REORGANIZE	((byte)18)	/*!< reorganize an
+						index page in
+						ROW_FORMAT=REDUNDANT */
+#define MLOG_PAGE_CREATE	((byte)19)	/*!< create an index page */
+#define	MLOG_UNDO_INSERT	((byte)20)	/*!< insert entry in an undo
+						log */
+#define MLOG_UNDO_ERASE_END	((byte)21)	/*!< erase an undo log
+						page end */
+#define	MLOG_UNDO_INIT		((byte)22)	/*!< initialize a page in an
+						undo log */
+#define MLOG_UNDO_HDR_DISCARD	((byte)23)	/*!< discard an update undo log
+						header */
+#define	MLOG_UNDO_HDR_REUSE	((byte)24)	/*!< reuse an insert undo log
+						header */
+#define MLOG_UNDO_HDR_CREATE	((byte)25)	/*!< create an undo
+						log header */
+#define MLOG_REC_MIN_MARK	((byte)26)	/*!< mark an index
+						record as the
+						predefined minimum
+						record */
+#define MLOG_IBUF_BITMAP_INIT	((byte)27)	/*!< initialize an
+						ibuf bitmap page */
+/*#define	MLOG_FULL_PAGE	((byte)28)	full contents of a page */
+#ifdef UNIV_LOG_LSN_DEBUG
+# define MLOG_LSN		((byte)28)	/* current LSN */
+#endif
+#define MLOG_INIT_FILE_PAGE	((byte)29)	/*!< this means that a
+						file page is taken
+						into use and the prior
+						contents of the page
+						should be ignored: in
+						recovery we must not
+						trust the lsn values
+						stored to the file
+						page */
+#define MLOG_WRITE_STRING	((byte)30)	/*!< write a string to
+						a page */
+#define	MLOG_MULTI_REC_END	((byte)31)	/*!< if a single mtr writes
+						several log records,
+						this log record ends the
+						sequence of these records */
+#define MLOG_DUMMY_RECORD	((byte)32)	/*!< dummy log record used to
+						pad a log block full */
+#define MLOG_FILE_CREATE	((byte)33)	/*!< log record about an .ibd
+						file creation */
+#define MLOG_FILE_RENAME	((byte)34)	/*!< log record about an .ibd
+						file rename */
+#define MLOG_FILE_DELETE	((byte)35)	/*!< log record about an .ibd
+						file deletion */
+#define MLOG_COMP_REC_MIN_MARK	((byte)36)	/*!< mark a compact
+						index record as the
+						predefined minimum
+						record */
+#define MLOG_COMP_PAGE_CREATE	((byte)37)	/*!< create a compact
+						index page */
+#define MLOG_COMP_REC_INSERT	((byte)38)	/*!< compact record insert */
+#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
+						/*!< mark compact
+						clustered index record
+						deleted */
+#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
+						secondary index record
+						deleted; this log
+						record type is
+						redundant, as
+						MLOG_REC_SEC_DELETE_MARK
+						is independent of the
+						record format. */
+#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
+						compact record,
+						preserves record field
+						sizes */
+#define MLOG_COMP_REC_DELETE	((byte)42)	/*!< delete a compact record
+						from a page */
+#define MLOG_COMP_LIST_END_DELETE ((byte)43)	/*!< delete compact record list
+						end on index page */
+#define MLOG_COMP_LIST_START_DELETE ((byte)44)	/*!< delete compact record list
+						start on index page */
+#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
+						/*!< copy compact
+						record list end to a
+						new created index
+						page */
+#define MLOG_COMP_PAGE_REORGANIZE ((byte)46)	/*!< reorganize an index page */
+#define MLOG_FILE_CREATE2	((byte)47)	/*!< log record about creating
+						an .ibd file, with format */
+#define MLOG_ZIP_WRITE_NODE_PTR	((byte)48)	/*!< write the node pointer of
+						a record on a compressed
+						non-leaf B-tree page */
+#define MLOG_ZIP_WRITE_BLOB_PTR	((byte)49)	/*!< write the BLOB pointer
+						of an externally stored column
+						on a compressed page */
+#define MLOG_ZIP_WRITE_HEADER	((byte)50)	/*!< write to compressed page
+						header */
+#define MLOG_ZIP_PAGE_COMPRESS	((byte)51)	/*!< compress an index page */
+#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA	((byte)52)/*!< compress an index page
+						without logging it's image */
+#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53)	/*!< reorganize a compressed
+						page */
+#define MLOG_BIGGEST_TYPE	((byte)53)	/*!< biggest value (used in
+						assertions) */
+/* @} */
+
+/** @name Flags for MLOG_FILE operations
+(stored in the page number parameter, called log_flags in the
+functions).  The page number parameter was originally written as 0. @{ */
+#define MLOG_FILE_FLAG_TEMP	1	/*!< identifies TEMPORARY TABLE in
+					MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
+/* @} */
+
+/* included here because it needs MLOG_LSN defined */
+#include "log0log.h"
+
+/***************************************************************//**
+Starts a mini-transaction. */
+UNIV_INLINE
+void
+mtr_start(
+/*======*/
+	mtr_t*	mtr)	/*!< out: mini-transaction */
+	__attribute__((nonnull));
+/***************************************************************//**
+Commits a mini-transaction. */
+UNIV_INTERN
+void
+mtr_commit(
+/*=======*/
+	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/**********************************************************//**
+Sets and returns a savepoint in mtr.
+@return	savepoint */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+	mtr_t*	mtr);	/*!< in: mtr */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+	mtr_t*		mtr,		/*!< in: mtr */
+	ulint		savepoint,	/*!< in: savepoint */
+	rw_lock_t*	lock);		/*!< in: latch to release */
+#else /* !UNIV_HOTBACKUP */
+# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Gets the logging mode of a mini-transaction.
+@return	logging mode: MTR_LOG_NONE, ... */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+	mtr_t*	mtr);	/*!< in: mtr */
+/***************************************************************//**
+Changes the logging mode of a mini-transaction.
+@return	old mode */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	mode);	/*!< in: logging mode: MTR_LOG_NONE, ... */
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return	value read */
+UNIV_INTERN
+ulint
+mtr_read_ulint(
+/*===========*/
+	const byte*	ptr,	/*!< in: pointer from where to read */
+	ulint		type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+This macro locks an rw-lock in s-mode. */
+#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), __FILE__, __LINE__,\
+						(MTR))
+/*********************************************************************//**
+This macro locks an rw-lock in x-mode. */
+#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), __FILE__, __LINE__,\
+						(MTR))
+/*********************************************************************//**
+NOTE! Use the macro above!
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************************//**
+NOTE! Use the macro above!
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************//**
+Releases an object in the memo stack.
+@return true if released */
+UNIV_INTERN
+bool
+mtr_memo_release(
+/*=============*/
+	mtr_t*	mtr,	/*!< in/out: mini-transaction */
+	void*	object,	/*!< in: object */
+	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+# ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Checks if memo contains the given item.
+@return	TRUE if contains */
+UNIV_INLINE
+bool
+mtr_memo_contains(
+/*==============*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const void*	object,	/*!< in: object to search */
+	ulint		type)	/*!< in: type of object */
+	__attribute__((warn_unused_result, nonnull));
+
+/**********************************************************//**
+Checks if memo contains the given page.
+@return	TRUE if contains */
+UNIV_INTERN
+ibool
+mtr_memo_contains_page(
+/*===================*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const byte*	ptr,	/*!< in: pointer to buffer frame */
+	ulint		type);	/*!< in: type of object */
+/*********************************************************//**
+Prints info of an mtr handle. */
+UNIV_INTERN
+void
+mtr_print(
+/*======*/
+	mtr_t*	mtr);	/*!< in: mtr */
+# else /* !UNIV_HOTBACKUP */
+#  define mtr_memo_contains(mtr, object, type)		TRUE
+#  define mtr_memo_contains_page(mtr, ptr, type)	TRUE
+# endif /* !UNIV_HOTBACKUP */
+#endif /* UNIV_DEBUG */
+/*######################################################################*/
+
+#define	MTR_BUF_MEMO_SIZE	200	/* number of slots in memo */
+
+/***************************************************************//**
+Returns the log object of a mini-transaction buffer.
+@return	log */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+	mtr_t*	mtr);	/*!< in: mini-transaction */
+/***************************************************//**
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	void*	object,	/*!< in: object */
+	ulint	type);	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+
+/** Mini-transaction memo stack slot. */
+struct mtr_memo_slot_t{
+	ulint	type;	/*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
+	void*	object;	/*!< pointer to the object */
+};
+
+/* Mini-transaction handle and buffer */
+struct mtr_t{
+#ifdef UNIV_DEBUG
+	ulint		state;	/*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
+#endif
+	dyn_array_t	memo;	/*!< memo stack for locks etc. */
+	dyn_array_t	log;	/*!< mini-transaction log */
+	unsigned	inside_ibuf:1;
+				/*!< TRUE if inside ibuf changes */
+	unsigned	modifications:1;
+				/*!< TRUE if the mini-transaction
+				modified buffer pool pages */
+	unsigned	made_dirty:1;
+				/*!< TRUE if mtr has made at least
+				one buffer pool page dirty */
+	ulint		n_log_recs;
+				/* count of how many page initial log records
+				have been written to the mtr log */
+	ulint		n_freed_pages;
+				/* number of pages that have been freed in
+				this mini-transaction */
+	ulint		log_mode; /* specifies which operations should be
+				logged; default value MTR_LOG_ALL */
+	lsn_t		start_lsn;/* start lsn of the possible log entry for
+				this mtr */
+	lsn_t		end_lsn;/* end lsn of the possible log entry for
+				this mtr */
+#ifdef UNIV_DEBUG
+	ulint		magic_n;
+#endif /* UNIV_DEBUG */
+};
+
+#ifdef UNIV_DEBUG
+# define MTR_MAGIC_N		54551
+#endif /* UNIV_DEBUG */
+
+#define MTR_ACTIVE		12231
+#define MTR_COMMITTING		56456
+#define MTR_COMMITTED		34676
+
+#ifndef UNIV_NONINL
+#include "mtr0mtr.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
new file mode 100644
index 00000000000..a9f02430220
--- /dev/null
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -0,0 +1,296 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0mtr.ic
+Mini-transaction buffer
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+# include "sync0sync.h"
+# include "sync0rw.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "mach0data.h"
+
+/***************************************************//**
+Checks if a mini-transaction is dirtying a clean page.
+@return TRUE if the mtr is dirtying a clean page. */
+UNIV_INTERN
+ibool
+mtr_block_dirtied(
+/*==============*/
+	const buf_block_t*	block)	/*!< in: block being x-fixed */
+	__attribute__((nonnull,warn_unused_result));
+
+/***************************************************************//**
+Starts a mini-transaction. */
+UNIV_INLINE
+void
+mtr_start(
+/*======*/
+	mtr_t*	mtr)	/*!< out: mini-transaction */
+{
+	UNIV_MEM_INVALID(mtr, sizeof *mtr);
+
+	dyn_array_create(&(mtr->memo));
+	dyn_array_create(&(mtr->log));
+
+	mtr->log_mode = MTR_LOG_ALL;
+	mtr->inside_ibuf = FALSE;
+	mtr->modifications = FALSE;
+	mtr->made_dirty = FALSE;
+	mtr->n_log_recs = 0;
+	mtr->n_freed_pages = 0;
+
+	ut_d(mtr->state = MTR_ACTIVE);
+	ut_d(mtr->magic_n = MTR_MAGIC_N);
+}
+
+/***************************************************//**
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	void*	object,	/*!< in: object */
+	ulint	type)	/*!< in: object type: MTR_MEMO_S_LOCK, ... */
+{
+	dyn_array_t*		memo;
+	mtr_memo_slot_t*	slot;
+
+	ut_ad(object);
+	ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
+	ut_ad(type <= MTR_MEMO_X_LOCK);
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
+	/* If this mtr has x-fixed a clean page then we set
+	the made_dirty flag. This tells us if we need to
+	grab log_flush_order_mutex at mtr_commit so that we
+	can insert the dirtied page to the flush list. */
+	if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) {
+		mtr->made_dirty =
+			mtr_block_dirtied((const buf_block_t*) object);
+	}
+
+	memo = &(mtr->memo);
+
+	slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
+
+	slot->object = object;
+	slot->type = type;
+}
+
+/**********************************************************//**
+Sets and returns a savepoint in mtr.
+@return	savepoint */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	dyn_array_t*	memo;
+
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
+	memo = &(mtr->memo);
+
+	return(dyn_array_get_data_size(memo));
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+	mtr_t*		mtr,		/*!< in: mtr */
+	ulint		savepoint,	/*!< in: savepoint */
+	rw_lock_t*	lock)		/*!< in: latch to release */
+{
+	mtr_memo_slot_t* slot;
+	dyn_array_t*	memo;
+
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
+	memo = &(mtr->memo);
+
+	ut_ad(dyn_array_get_data_size(memo) > savepoint);
+
+	slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
+
+	ut_ad(slot->object == lock);
+	ut_ad(slot->type == MTR_MEMO_S_LOCK);
+
+	rw_lock_s_unlock(lock);
+
+	slot->object = NULL;
+}
+
+# ifdef UNIV_DEBUG
+/**********************************************************//**
+Checks if memo contains the given item.
+@return	TRUE if contains */
+UNIV_INLINE
+bool
+mtr_memo_contains(
+/*==============*/
+	mtr_t*		mtr,	/*!< in: mtr */
+	const void*	object,	/*!< in: object to search */
+	ulint		type)	/*!< in: type of object */
+{
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
+
+	for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
+	     block;
+	     block = dyn_array_get_prev_block(&mtr->memo, block)) {
+		const mtr_memo_slot_t*	start
+			= reinterpret_cast<mtr_memo_slot_t*>(
+				dyn_block_get_data(block));
+		mtr_memo_slot_t*	slot
+			= reinterpret_cast<mtr_memo_slot_t*>(
+				dyn_block_get_data(block)
+				+ dyn_block_get_used(block));
+
+		ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
+
+		while (slot-- != start) {
+			if (object == slot->object && type == slot->type) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+# endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************//**
+Returns the log object of a mini-transaction buffer.
+@return	log */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+	mtr_t*	mtr)	/*!< in: mini-transaction */
+{
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+	return(&(mtr->log));
+}
+
+/***************************************************************//**
+Gets the logging mode of a mini-transaction.
+@return	logging mode: MTR_LOG_NONE, ... */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	ut_ad(mtr);
+	ut_ad(mtr->log_mode >= MTR_LOG_ALL);
+	ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
+
+	return(mtr->log_mode);
+}
+
+/***************************************************************//**
+Changes the logging mode of a mini-transaction.
+@return	old mode */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+	mtr_t*	mtr,	/*!< in: mtr */
+	ulint	mode)	/*!< in: logging mode: MTR_LOG_NONE, ... */
+{
+	ulint	old_mode;
+
+	ut_ad(mtr);
+	ut_ad(mode >= MTR_LOG_ALL);
+	ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
+
+	old_mode = mtr->log_mode;
+
+	if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
+		/* Do nothing */
+	} else {
+		mtr->log_mode = mode;
+	}
+
+	ut_ad(old_mode >= MTR_LOG_ALL);
+	ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
+
+	return(old_mode);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(mtr);
+	ut_ad(lock);
+
+	rw_lock_s_lock_inline(lock, 0, file, line);
+
+	mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
+}
+
+/*********************************************************************//**
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/*!< in: rw-lock */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line number */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(mtr);
+	ut_ad(lock);
+
+	rw_lock_x_lock_inline(lock, 0, file, line);
+
+	mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
new file mode 100644
index 00000000000..43368c0b726
--- /dev/null
+++ b/storage/innobase/include/mtr0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/mtr0types.h
+Mini-transaction buffer global types
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0types_h
+#define mtr0types_h
+
+struct mtr_t;
+
+#endif
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
new file mode 100644
index 00000000000..ad9b6a9ac10
--- /dev/null
+++ b/storage/innobase/include/os0file.h
@@ -0,0 +1,1289 @@
+/***********************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/os0file.h
+The interface to the operating system file io
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0file_h
+#define os0file_h
+
+#include "univ.i"
+
+#ifndef __WIN__
+#include <dirent.h>
+#include <sys/stat.h>
+#include <time.h>
+#endif
+
+/** File node of a tablespace or the log data space */
+struct fil_node_t;
+
+extern ibool	os_has_said_disk_full;
+/** Flag: enable debug printout for asynchronous i/o */
+extern ibool	os_aio_print_debug;
+
+/** Number of pending os_file_pread() operations */
+extern ulint	os_file_n_pending_preads;
+/** Number of pending os_file_pwrite() operations */
+extern ulint	os_file_n_pending_pwrites;
+
+/** Number of pending read operations */
+extern ulint	os_n_pending_reads;
+/** Number of pending write operations */
+extern ulint	os_n_pending_writes;
+
+#ifdef __WIN__
+
+/** We define always WIN_ASYNC_IO, and check at run-time whether
+   the OS actually supports it: Win 95 does not, NT does. */
+#define WIN_ASYNC_IO
+
+/** Use unbuffered I/O */
+#define UNIV_NON_BUFFERED_IO
+
+#endif
+
+/** File offset in bytes */
+typedef ib_uint64_t os_offset_t;
+#ifdef __WIN__
+/** File handle */
+# define os_file_t	HANDLE
+/** Convert a C file descriptor to a native file handle
+@param fd	file descriptor
+@return		native file handle */
+# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
+#else
+/** File handle */
+typedef int	os_file_t;
+/** Convert a C file descriptor to a native file handle
+@param fd	file descriptor
+@return		native file handle */
+# define OS_FILE_FROM_FD(fd) fd
+#endif
+
+/** Umask for creating files */
+extern ulint	os_innodb_umask;
+
+/** The next value should be smaller or equal to the smallest sector size used
+on any disk. A log block is required to be a portion of disk which is written
+so that if the start and the end of a block get written to disk, then the
+whole block gets written. This should be true even in most cases of a crash:
+if this fails for a log block, then it is equivalent to a media failure in the
+log. */
+
+#define OS_FILE_LOG_BLOCK_SIZE		512
+
+/** Options for os_file_create_func @{ */
+enum os_file_create_t {
+	OS_FILE_OPEN = 51,		/*!< to open an existing file (if
+					doesn't exist, error) */
+	OS_FILE_CREATE,			/*!< to create new file (if
+					exists, error) */
+	OS_FILE_OVERWRITE,		/*!< to create a new file, if exists
+					the overwrite old file */
+	OS_FILE_OPEN_RAW,		/*!< to open a raw device or disk
+					partition */
+	OS_FILE_CREATE_PATH,		/*!< to create the directories */
+	OS_FILE_OPEN_RETRY,		/*!< open with retry */
+
+	/** Flags that can be combined with the above values. Please ensure
+	that the above values stay below 128. */
+
+	OS_FILE_ON_ERROR_NO_EXIT = 128,	/*!< do not exit on unknown errors */
+	OS_FILE_ON_ERROR_SILENT = 256	/*!< don't print diagnostic messages to
+					the log unless it is a fatal error,
+					this flag is only used if
+					ON_ERROR_NO_EXIT is set */
+};
+
+#define OS_FILE_READ_ONLY		333
+#define	OS_FILE_READ_WRITE		444
+#define	OS_FILE_READ_ALLOW_DELETE	555	/* for mysqlbackup */
+
+/* Options for file_create */
+#define	OS_FILE_AIO			61
+#define	OS_FILE_NORMAL			62
+/* @} */
+
+/** Types for file create @{ */
+#define	OS_DATA_FILE			100
+#define OS_LOG_FILE			101
+/* @} */
+
+/** Error codes from os_file_get_last_error @{ */
+#define	OS_FILE_NOT_FOUND		71
+#define	OS_FILE_DISK_FULL		72
+#define	OS_FILE_ALREADY_EXISTS		73
+#define	OS_FILE_PATH_ERROR		74
+#define	OS_FILE_AIO_RESOURCES_RESERVED	75	/* wait for OS aio resources
+						to become available again */
+#define	OS_FILE_SHARING_VIOLATION	76
+#define	OS_FILE_ERROR_NOT_SPECIFIED	77
+#define	OS_FILE_INSUFFICIENT_RESOURCE	78
+#define	OS_FILE_AIO_INTERRUPTED		79
+#define	OS_FILE_OPERATION_ABORTED	80
+
+#define	OS_FILE_ACCESS_VIOLATION	81
+
+#define	OS_FILE_ERROR_MAX		100
+/* @} */
+
+/** Types for aio operations @{ */
+#define OS_FILE_READ	10
+#define OS_FILE_WRITE	11
+
+#define OS_FILE_LOG	256	/* This can be ORed to type */
+/* @} */
+
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 32	/*!< Win NT does not allow more
+						than 64 */
+
+/** Modes for aio operations @{ */
+#define OS_AIO_NORMAL	21	/*!< Normal asynchronous i/o not for ibuf
+				pages or ibuf bitmap pages */
+#define OS_AIO_IBUF	22	/*!< Asynchronous i/o for ibuf pages or ibuf
+				bitmap pages */
+#define OS_AIO_LOG	23	/*!< Asynchronous i/o for the log */
+#define OS_AIO_SYNC	24	/*!< Asynchronous i/o where the calling thread
+				will itself wait for the i/o to complete,
+				doing also the job of the i/o-handler thread;
+				can be used for any pages, ibuf or non-ibuf.
+				This is used to save CPU time, as we can do
+				with fewer thread switches. Plain synchronous
+				i/o is not as good, because it must serialize
+				the file seek and read or write, causing a
+				bottleneck for parallelism. */
+
+#define OS_AIO_SIMULATED_WAKE_LATER	512 /*!< This can be ORed to mode
+				in the call of os_aio(...),
+				if the caller wants to post several i/o
+				requests in a batch, and only after that
+				wake the i/o-handler thread; this has
+				effect only in simulated aio */
+/* @} */
+
+#define OS_WIN31	1	/*!< Microsoft Windows 3.x */
+#define OS_WIN95	2	/*!< Microsoft Windows 95 */
+#define OS_WINNT	3	/*!< Microsoft Windows NT 3.x */
+#define OS_WIN2000	4	/*!< Microsoft Windows 2000 */
+#define OS_WINXP	5	/*!< Microsoft Windows XP
+				or Windows Server 2003 */
+#define OS_WINVISTA	6	/*!< Microsoft Windows Vista
+				or Windows Server 2008 */
+#define OS_WIN7		7	/*!< Microsoft Windows 7
+				or Windows Server 2008 R2 */
+
+
+extern ulint	os_n_file_reads;
+extern ulint	os_n_file_writes;
+extern ulint	os_n_fsyncs;
+
+#ifdef UNIV_PFS_IO
+/* Keys to register InnoDB I/O with performance schema */
+extern mysql_pfs_key_t	innodb_file_data_key;
+extern mysql_pfs_key_t	innodb_file_log_key;
+extern mysql_pfs_key_t	innodb_file_temp_key;
+
+/* Following four macros are instumentations to register
+various file I/O operations with performance schema.
+1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
+used to register file creation, opening, closing and renaming.
+2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
+used to register actual file read, write and flush
+3) register_pfs_file_close_begin() and register_pfs_file_close_end()
+are used to register file deletion operations*/
+# define register_pfs_file_open_begin(state, locker, key, op, name,	\
+				      src_file, src_line)		\
+do {									\
+	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
+		state, key, op, name, &locker);				\
+	if (UNIV_LIKELY(locker != NULL)) {				\
+		PSI_FILE_CALL(start_file_open_wait)(			\
+			locker, src_file, src_line);			\
+	}								\
+} while (0)
+
+# define register_pfs_file_open_end(locker, file)			\
+do {									\
+	if (UNIV_LIKELY(locker != NULL)) {				\
+		PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
+			locker, file);					\
+	}								\
+} while (0)
+
+# define register_pfs_file_close_begin(state, locker, key, op, name,	\
+				      src_file, src_line)		\
+do {									\
+	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
+		state, key, op, name, &locker);				\
+	if (UNIV_LIKELY(locker != NULL)) {				\
+		PSI_FILE_CALL(start_file_close_wait)(			\
+			locker, src_file, src_line);			\
+	}								\
+} while (0)
+
+# define register_pfs_file_close_end(locker, result)			\
+do {									\
+	if (UNIV_LIKELY(locker != NULL)) {				\
+		PSI_FILE_CALL(end_file_close_wait)(			\
+			locker, result);				\
+	}								\
+} while (0)
+
+# define register_pfs_file_io_begin(state, locker, file, count, op,	\
+				    src_file, src_line)			\
+do {									\
+	locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(	\
+		state, file, op);					\
+	if (UNIV_LIKELY(locker != NULL)) {				\
+		PSI_FILE_CALL(start_file_wait)(				\
+			locker, count, src_file, src_line);		\
+	}								\
+} while (0)
+
+# define register_pfs_file_io_end(locker, count)			\
+do {									\
+	if (UNIV_LIKELY(locker != NULL)) {				\
+		PSI_FILE_CALL(end_file_wait)(locker, count);		\
+	}								\
+} while (0)
+#endif /* UNIV_PFS_IO  */
+
+/* Following macros/functions are file I/O APIs that would be performance
+schema instrumented if "UNIV_PFS_IO" is defined. They would point to
+wrapper functions with performance schema instrumentation in such case.
+
+os_file_create
+os_file_create_simple
+os_file_create_simple_no_error_handling
+os_file_close
+os_file_rename
+os_aio
+os_file_read
+os_file_read_no_error_handling
+os_file_write
+
+The wrapper functions have the prefix of "innodb_". */
+
+#ifdef UNIV_PFS_IO
+# define os_file_create(key, name, create, purpose, type, success)	\
+	pfs_os_file_create_func(key, name, create, purpose,	type,	\
+				success, __FILE__, __LINE__)
+
+# define os_file_create_simple(key, name, create, access, success)	\
+	pfs_os_file_create_simple_func(key, name, create, access,	\
+				       success, __FILE__, __LINE__)
+
+# define os_file_create_simple_no_error_handling(			\
+		key, name, create_mode, access, success)		\
+	pfs_os_file_create_simple_no_error_handling_func(		\
+		key, name, create_mode, access, success, __FILE__, __LINE__)
+
+# define os_file_close(file)						\
+	pfs_os_file_close_func(file, __FILE__, __LINE__)
+
+# define os_aio(type, mode, name, file, buf, offset,			\
+		n, message1, message2)					\
+	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
+			n, message1, message2, __FILE__, __LINE__)
+
+# define os_file_read(file, buf, offset, n)				\
+	pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
+
+# define os_file_read_no_error_handling(file, buf, offset, n)		\
+	pfs_os_file_read_no_error_handling_func(file, buf, offset, n,	\
+						__FILE__, __LINE__)
+
+# define os_file_write(name, file, buf, offset, n)	\
+	pfs_os_file_write_func(name, file, buf, offset,	\
+			       n, __FILE__, __LINE__)
+
+# define os_file_flush(file)						\
+	pfs_os_file_flush_func(file, __FILE__, __LINE__)
+
+# define os_file_rename(key, oldpath, newpath)				\
+	pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+
+# define os_file_delete(key, name)					\
+	pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
+
+# define os_file_delete_if_exists(key, name)				\
+	pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
+#else /* UNIV_PFS_IO */
+
+/* If UNIV_PFS_IO is not defined, these I/O APIs point
+to original un-instrumented file I/O APIs */
+# define os_file_create(key, name, create, purpose, type, success)	\
+	os_file_create_func(name, create, purpose, type, success)
+
+# define os_file_create_simple(key, name, create_mode, access, success)	\
+	os_file_create_simple_func(name, create_mode, access, success)
+
+# define os_file_create_simple_no_error_handling(			\
+		key, name, create_mode, access, success)		\
+	os_file_create_simple_no_error_handling_func(			\
+		name, create_mode, access, success)
+
+# define os_file_close(file)	os_file_close_func(file)
+
+# define os_aio(type, mode, name, file, buf, offset, n, message1, message2) \
+	os_aio_func(type, mode, name, file, buf, offset, n,		\
+		    message1, message2)
+
+# define os_file_read(file, buf, offset, n)	\
+	os_file_read_func(file, buf, offset, n)
+
+# define os_file_read_no_error_handling(file, buf, offset, n)		\
+	os_file_read_no_error_handling_func(file, buf, offset, n)
+
+# define os_file_write(name, file, buf, offset, n)			\
+	os_file_write_func(name, file, buf, offset, n)
+
+# define os_file_flush(file)	os_file_flush_func(file)
+
+# define os_file_rename(key, oldpath, newpath)				\
+	os_file_rename_func(oldpath, newpath)
+
+# define os_file_delete(key, name)	os_file_delete_func(name)
+
+# define os_file_delete_if_exists(key, name)				\
+	os_file_delete_if_exists_func(name)
+
+#endif /* UNIV_PFS_IO */
+
+/* File types for directory entry data type */
+
+enum os_file_type_t {
+	OS_FILE_TYPE_UNKNOWN = 0,
+	OS_FILE_TYPE_FILE,			/* regular file */
+	OS_FILE_TYPE_DIR,			/* directory */
+	OS_FILE_TYPE_LINK,			/* symbolic link */
+	OS_FILE_TYPE_BLOCK			/* block device */
+};
+
+/* Maximum path string length in bytes when referring to tables with in the
+'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
+of this size from the thread stack; that is why this should not be made much
+bigger than 4000 bytes */
+#define OS_FILE_MAX_PATH	4000
+
+/** Struct used in fetching information of a file in a directory */
+struct os_file_stat_t {
+	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
+	os_file_type_t	type;			/*!< file type */
+	ib_int64_t	size;			/*!< file size */
+	time_t		ctime;			/*!< creation time */
+	time_t		mtime;			/*!< modification time */
+	time_t		atime;			/*!< access time */
+	bool		rw_perm;		/*!< true if can be opened
+						in read-write mode. Only valid
+						if type == OS_FILE_TYPE_FILE */
+};
+
+#ifdef __WIN__
+typedef HANDLE	os_file_dir_t;	/*!< directory stream */
+#else
+typedef DIR*	os_file_dir_t;	/*!< directory stream */
+#endif
+
+#ifdef __WIN__
+/***********************************************************************//**
+Gets the operating system version. Currently works only on Windows.
+@return	OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
+OS_WIN7. */
+UNIV_INTERN
+ulint
+os_get_os_version(void);
+/*===================*/
+#endif /* __WIN__ */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Creates the seek mutexes used in positioned reads and writes. */
+UNIV_INTERN
+void
+os_io_init_simple(void);
+/*===================*/
+/***********************************************************************//**
+Creates a temporary file.  This function is like tmpfile(3), but
+the temporary file is created in the MySQL temporary directory.
+@return	temporary file handle, or NULL on error */
+
+FILE*
+os_file_create_tmpfile(void);
+/*========================*/
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************************//**
+The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing.
+@return	directory stream, NULL if error */
+UNIV_INTERN
+os_file_dir_t
+os_file_opendir(
+/*============*/
+	const char*	dirname,	/*!< in: directory name; it must not
+					contain a trailing '\' or '/' */
+	ibool		error_is_fatal);/*!< in: TRUE if we should treat an
+					error as a fatal error; if we try to
+					open symlinks then we do not wish a
+					fatal error if it happens not to be
+					a directory */
+/***********************************************************************//**
+Closes a directory stream.
+@return	0 if success, -1 if failure */
+UNIV_INTERN
+int
+os_file_closedir(
+/*=============*/
+	os_file_dir_t	dir);	/*!< in: directory stream */
+/***********************************************************************//**
+This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory.
+@return	0 if ok, -1 if error, 1 if at the end of the directory */
+UNIV_INTERN
+int
+os_file_readdir_next_file(
+/*======================*/
+	const char*	dirname,/*!< in: directory name or path */
+	os_file_dir_t	dir,	/*!< in: directory stream */
+	os_file_stat_t*	info);	/*!< in/out: buffer where the info is returned */
+/*****************************************************************//**
+This function attempts to create a directory named pathname. The new directory
+gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
+directory exists already, nothing is done and the call succeeds, unless the
+fail_if_exists arguments is true.
+@return	TRUE if call succeeds, FALSE on error */
+UNIV_INTERN
+ibool
+os_file_create_directory(
+/*=====================*/
+	const char*	pathname,	/*!< in: directory name as
+					null-terminated string */
+	ibool		fail_if_exists);/*!< in: if TRUE, pre-existing directory
+					is treated as an error. */
+/****************************************************************//**
+NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
+os_file_t
+os_file_create_simple_func(
+/*=======================*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: create mode */
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
+				OS_FILE_READ_WRITE */
+	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
+/****************************************************************//**
+NOTE! Use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A simple function to open or create a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
+os_file_t
+os_file_create_simple_no_error_handling_func(
+/*=========================================*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: create mode */
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
+				OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file */
+	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	__attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Tries to disable OS caching on an opened file descriptor. */
+UNIV_INTERN
+void
+os_file_set_nocache(
+/*================*/
+	int		fd,		/*!< in: file descriptor to alter */
+	const char*	file_name,	/*!< in: file name, used in the
+					diagnostic message */
+	const char*	operation_name);/*!< in: "open" or "create"; used in the
+					diagnostic message */
+/****************************************************************//**
+NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
+Opens an existing file or creates a new.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INTERN
+os_file_t
+os_file_create_func(
+/*================*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: create mode */
+	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
+				non-buffered i/o is desired,
+				OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async i/o or unbuffered i/o: look in the
+				function source code for the exact rules */
+	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
+	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************************//**
+Deletes a file. The file has to be closed before calling this.
+@return	TRUE if success */
+UNIV_INTERN
+bool
+os_file_delete_func(
+/*================*/
+	const char*	name);	/*!< in: file path as a null-terminated
+				string */
+
+/***********************************************************************//**
+Deletes a file if it exists. The file has to be closed before calling this.
+@return	TRUE if success */
+UNIV_INTERN
+bool
+os_file_delete_if_exists_func(
+/*==========================*/
+	const char*	name);	/*!< in: file path as a null-terminated
+				string */
+/***********************************************************************//**
+NOTE! Use the corresponding macro os_file_rename(), not directly
+this function!
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+os_file_rename_func(
+/*================*/
+	const char*	oldpath,	/*!< in: old file path as a
+					null-terminated string */
+	const char*	newpath);	/*!< in: new file path */
+/***********************************************************************//**
+NOTE! Use the corresponding macro os_file_close(), not directly this
+function!
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+os_file_close_func(
+/*===============*/
+	os_file_t	file);	/*!< in, own: handle to a file */
+
+#ifdef UNIV_PFS_IO
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create_simple(),
+not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple() which opens or creates a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_func(
+/*===========================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: create mode */
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
+				OS_FILE_READ_WRITE */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+	__attribute__((nonnull, warn_unused_result));
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple_no_error_handling(). Add instrumentation to
+monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_no_error_handling_func(
+/*=============================================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode, /*!< in: file create mode */
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
+				OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+	__attribute__((nonnull, warn_unused_result));
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create(), not directly
+this function!
+A performance schema wrapper function for os_file_create().
+Add instrumentation to monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_func(
+/*====================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: file create mode */
+	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
+				non-buffered i/o is desired,
+				OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async i/o or unbuffered i/o: look in the
+				function source code for the exact rules */
+	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+	__attribute__((nonnull, warn_unused_result));
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_close(), not directly
+this function!
+A performance schema instrumented wrapper function for os_file_close().
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_close_func(
+/*===================*/
+        os_file_t	file,	/*!< in, own: handle to a file */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_read(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_read() which requests a synchronous read operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_func(
+/*==================*/
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	os_offset_t	offset,	/*!< in: file offset where to read */
+	ulint		n,	/*!< in: number of bytes to read */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_read_no_error_handling_func() which requests a synchronous
+read operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_no_error_handling_func(
+/*====================================*/
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	os_offset_t	offset,	/*!< in: file offset where to read */
+	ulint		n,	/*!< in: number of bytes to read */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_aio(), not directly this
+function!
+Performance schema wrapper function of os_aio() which requests
+an asynchronous i/o operation.
+@return TRUE if request was queued successfully, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_aio_func(
+/*============*/
+	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
+	ulint		mode,	/*!< in: OS_AIO_NORMAL etc. I/O mode */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read or from which
+				to write */
+	os_offset_t	offset,	/*!< in: file offset where to read or write */
+	ulint		n,	/*!< in: number of bytes to read or write */
+	fil_node_t*	message1,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+	void*		message2,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+                                OS_AIO_SYNC */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_write(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_write() which requests a synchronous write operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_write_func(
+/*===================*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	const void*	buf,	/*!< in: buffer from which to write */
+	os_offset_t	offset,	/*!< in: file offset where to write */
+	ulint		n,	/*!< in: number of bytes to write */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_flush(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_flush() which flushes the write buffers of a given file to the disk.
+Flushes the write buffers of a given file to the disk.
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_flush_func(
+/*===================*/
+	os_file_t	file,	/*!< in, own: handle to a file */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_rename(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_rename()
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_rename_func(
+/*====================*/
+	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
+	const char*	oldpath,/*!< in: old file path as a null-terminated
+				string */
+	const char*	newpath,/*!< in: new file path */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+/*====================*/
+	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: old file path as a null-terminated
+				string */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+/*==============================*/
+	mysql_pfs_key_t	key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: old file path as a null-terminated
+				string */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line);/*!< in: line where the func invoked */
+#endif	/* UNIV_PFS_IO */
+
+#ifdef UNIV_HOTBACKUP
+/***********************************************************************//**
+Closes a file handle.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+os_file_close_no_error_handling(
+/*============================*/
+	os_file_t	file);	/*!< in, own: handle to a file */
+#endif /* UNIV_HOTBACKUP */
+/***********************************************************************//**
+Gets a file size.
+@return	file size, or (os_offset_t) -1 on failure */
+UNIV_INTERN
+os_offset_t
+os_file_get_size(
+/*=============*/
+	os_file_t	file)	/*!< in: handle to a file */
+	__attribute__((warn_unused_result));
+/***********************************************************************//**
+Write the specified number of zeros to a newly created file.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+os_file_set_size(
+/*=============*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	os_offset_t	size)	/*!< in: file size */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************************//**
+Truncates a file at its current position.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+os_file_set_eof(
+/*============*/
+	FILE*		file);	/*!< in: file to be truncated */
+/***********************************************************************//**
+NOTE! Use the corresponding macro os_file_flush(), not directly this function!
+Flushes the write buffers of a given file to the disk.
+@return	TRUE if success */
+UNIV_INTERN
+ibool
+os_file_flush_func(
+/*===============*/
+	os_file_t	file);	/*!< in, own: handle to a file */
+/***********************************************************************//**
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned.
+@return	error number, or OS error number + 100 */
+UNIV_INTERN
+ulint
+os_file_get_last_error(
+/*===================*/
+	bool	report_all_errors);	/*!< in: TRUE if we want an error message
+					printed of all errors */
+/*******************************************************************//**
+NOTE! Use the corresponding macro os_file_read(), not directly this function!
+Requests a synchronous read operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+os_file_read_func(
+/*==============*/
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	os_offset_t	offset,	/*!< in: file offset where to read */
+	ulint		n);	/*!< in: number of bytes to read */
+/*******************************************************************//**
+Rewind file to its start, read at most size - 1 bytes from it to str, and
+NUL-terminate str. All errors are silently ignored. This function is
+mostly meant to be used with temporary files. */
+UNIV_INTERN
+void
+os_file_read_string(
+/*================*/
+	FILE*	file,	/*!< in: file to read from */
+	char*	str,	/*!< in: buffer where to read */
+	ulint	size);	/*!< in: size of buffer */
+/*******************************************************************//**
+NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
+Requests a synchronous positioned read operation. This function does not do
+any error handling. In case of error it returns FALSE.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+os_file_read_no_error_handling_func(
+/*================================*/
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	os_offset_t	offset,	/*!< in: file offset where to read */
+	ulint		n);	/*!< in: number of bytes to read */
+
+/*******************************************************************//**
+NOTE! Use the corresponding macro os_file_write(), not directly this
+function!
+Requests a synchronous write operation.
+@return	TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+os_file_write_func(
+/*===============*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	const void*	buf,	/*!< in: buffer from which to write */
+	os_offset_t	offset,	/*!< in: file offset where to write */
+	ulint		n);	/*!< in: number of bytes to write */
+/*******************************************************************//**
+Check the existence and type of the given file.
+@return	TRUE if call succeeded */
+UNIV_INTERN
+ibool
+os_file_status(
+/*===========*/
+	const char*	path,	/*!< in:	pathname of the file */
+	ibool*		exists,	/*!< out: TRUE if file exists */
+	os_file_type_t* type);	/*!< out: type of the file (if it exists) */
+/****************************************************************//**
+The function os_file_dirname returns a directory component of a
+null-terminated pathname string.  In the usual case, dirname returns
+the string up to, but not including, the final '/', and basename
+is the component following the final '/'.  Trailing '/' characters
+are not counted as part of the pathname.
+
+If path does not contain a slash, dirname returns the string ".".
+
+Concatenating the string returned by dirname, a "/", and the basename
+yields a complete pathname.
+
+The return value is  a copy of the directory component of the pathname.
+The copy is allocated from heap. It is the caller responsibility
+to free it after it is no longer needed.
+
+The following list of examples (taken from SUSv2) shows the strings
+returned by dirname and basename for different paths:
+
+       path	      dirname	     basename
+       "/usr/lib"     "/usr"	     "lib"
+       "/usr/"	      "/"	     "usr"
+       "usr"	      "."	     "usr"
+       "/"	      "/"	     "/"
+       "."	      "."	     "."
+       ".."	      "."	     ".."
+
+@return	own: directory component of the pathname */
+UNIV_INTERN
+char*
+os_file_dirname(
+/*============*/
+	const char*	path);	/*!< in: pathname */
+/****************************************************************//**
+This function returns a new path name after replacing the basename
+in an old path with a new basename.  The old_path is a full path
+name including the extension.  The tablename is in the normal
+form "databasename/tablename".  The new base name is found after
+the forward slash.  Both input strings are null terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return	own: new full pathname */
+UNIV_INTERN
+char*
+os_file_make_new_pathname(
+/*======================*/
+	const char*	old_path,	/*!< in: pathname */
+	const char*	new_name);	/*!< in: new file name */
+/****************************************************************//**
+This function returns a remote path name by combining a data directory
+path provided in a DATA DIRECTORY clause with the tablename which is
+in the form 'database/tablename'.  It strips the file basename (which
+is the tablename) found after the last directory in the path provided.
+The full filepath created will include the database name as a directory
+under the path provided.  The filename is the tablename with the '.ibd'
+extension. All input and output strings are null-terminated.
+
+This function allocates memory to be returned.  It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return	own: A full pathname; data_dir_path/databasename/tablename.ibd */
+UNIV_INTERN
+char*
+os_file_make_remote_pathname(
+/*=========================*/
+	const char*	data_dir_path,	/*!< in: pathname */
+	const char*	tablename,	/*!< in: tablename */
+	const char*	extention);	/*!< in: file extention; ibd,cfg*/
+/****************************************************************//**
+This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return.  The result is used
+to inform a SHOW CREATE TABLE command. */
+UNIV_INTERN
+void
+os_file_make_data_dir_path(
+/*========================*/
+	char*	data_dir_path);	/*!< in/out: full path/data_dir_path */
+/****************************************************************//**
+Creates all missing subdirectories along the given path.
+@return	TRUE if call succeeded FALSE otherwise */
+UNIV_INTERN
+ibool
+os_file_create_subdirs_if_needed(
+/*=============================*/
+	const char*	path);	/*!< in: path name */
+/***********************************************************************
+Initializes the asynchronous io system. Creates one array each for ibuf
+and log i/o. Also creates one array each for read and write where each
+array is divided logically into n_read_segs and n_write_segs
+respectively. The caller must create an i/o handler thread for each
+segment in these arrays. This function also creates the sync array.
+No i/o handler thread needs to be created for that */
+UNIV_INTERN
+ibool
+os_aio_init(
+/*========*/
+	ulint	n_per_seg,	/*<! in: maximum number of pending aio
+				operations allowed per segment */
+	ulint	n_read_segs,	/*<! in: number of reader threads */
+	ulint	n_write_segs,	/*<! in: number of writer threads */
+	ulint	n_slots_sync);	/*<! in: number of slots in the sync aio
+				array */
+/***********************************************************************
+Frees the asynchronous io system. */
+UNIV_INTERN
+void
+os_aio_free(void);
+/*=============*/
+
+/*******************************************************************//**
+NOTE! Use the corresponding macro os_aio(), not directly this function!
+Requests an asynchronous i/o operation.
+@return	TRUE if request was queued successfully, FALSE if fail */
+UNIV_INTERN
+ibool
+os_aio_func(
+/*========*/
+	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
+	ulint		mode,	/*!< in: OS_AIO_NORMAL, ..., possibly ORed
+				to OS_AIO_SIMULATED_WAKE_LATER: the
+				last flag advises this function not to wake
+				i/o-handler threads, but the caller will
+				do the waking explicitly later, in this
+				way the caller can post several requests in
+				a batch; NOTE that the batch must not be
+				so big that it exhausts the slots in aio
+				arrays! NOTE that a simulated batch
+				may introduce hidden chances of deadlocks,
+				because i/os are not actually handled until
+				all have been posted: use with great
+				caution! */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read or from which
+				to write */
+	os_offset_t	offset,	/*!< in: file offset where to read or write */
+	ulint		n,	/*!< in: number of bytes to read or write */
+	fil_node_t*	message1,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+	void*		message2);/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+/************************************************************************//**
+Wakes up all async i/o threads so that they know to exit themselves in
+shutdown. */
+UNIV_INTERN
+void
+os_aio_wake_all_threads_at_shutdown(void);
+/*=====================================*/
+/************************************************************************//**
+Waits until there are no pending writes in os_aio_write_array. There can
+be other, synchronous, pending writes. */
+UNIV_INTERN
+void
+os_aio_wait_until_no_pending_writes(void);
+/*=====================================*/
+/**********************************************************************//**
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+UNIV_INTERN
+void
+os_aio_simulated_wake_handler_threads(void);
+/*=======================================*/
+/**********************************************************************//**
+This function can be called if one wants to post a batch of reads and
+prefers an i/o-handler thread to handle them all at once later. You must
+call os_aio_simulated_wake_handler_threads later to ensure the threads
+are not left sleeping! */
+UNIV_INTERN
+void
+os_aio_simulated_put_read_threads_to_sleep(void);
+/*============================================*/
+
+#ifdef WIN_ASYNC_IO
+/**********************************************************************//**
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@return	TRUE if the aio operation succeeded */
+UNIV_INTERN
+ibool
+os_aio_windows_handle(
+/*==================*/
+	ulint	segment,	/*!< in: the number of the segment in the aio
+				arrays to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 the log i/o thread,
+				then follow the non-ibuf read threads, and as
+				the last are the non-ibuf write threads; if
+				this is ULINT_UNDEFINED, then it means that
+				sync aio is used, and this parameter is
+				ignored */
+	ulint	pos,		/*!< this parameter is used only in sync aio:
+				wait for the aio slot at this position */
+	fil_node_t**message1,	/*!< out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2,
+	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
+#endif
+
+/**********************************************************************//**
+Does simulated aio. This function should be called by an i/o-handler
+thread.
+@return	TRUE if the aio operation succeeded */
+UNIV_INTERN
+ibool
+os_aio_simulated_handle(
+/*====================*/
+	ulint	segment,	/*!< in: the number of the segment in the aio
+				arrays to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 the log i/o thread,
+				then follow the non-ibuf read threads, and as
+				the last are the non-ibuf write threads */
+	fil_node_t**message1,	/*!< out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2,
+	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
+/**********************************************************************//**
+Validates the consistency of the aio system.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+os_aio_validate(void);
+/*=================*/
+/**********************************************************************//**
+Prints info of the aio arrays. */
+UNIV_INTERN
+void
+os_aio_print(
+/*=========*/
+	FILE*	file);	/*!< in: file where to print */
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+os_aio_refresh_stats(void);
+/*======================*/
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+UNIV_INTERN
+ibool
+os_aio_all_slots_free(void);
+/*=======================*/
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+This function returns information about the specified file
+@return	DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+os_file_get_status(
+/*===============*/
+	const char*	path,		/*!< in: pathname of the file */
+	os_file_stat_t* stat_info,	/*!< information of a file in a
+					directory */
+	bool		check_rw_perm);	/*!< in: for testing whether the
+					file can be opened in RW mode */
+
+#if !defined(UNIV_HOTBACKUP)
+/*********************************************************************//**
+Creates a temporary file that will be deleted on close.
+This function is defined in ha_innodb.cc.
+@return	temporary file descriptor, or < 0 on error */
+UNIV_INTERN
+int
+innobase_mysql_tmpfile(void);
+/*========================*/
+#endif /* !UNIV_HOTBACKUP */
+
+
+#if defined(LINUX_NATIVE_AIO)
+/**************************************************************************
+This function is only used in Linux native asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@return	TRUE if the IO was successful */
+UNIV_INTERN
+ibool
+os_aio_linux_handle(
+/*================*/
+	ulint	global_seg,	/*!< in: segment number in the aio array
+				to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 is log i/o thread,
+				then follow the non-ibuf read threads,
+				and the last are the non-ibuf write
+				threads. */
+	fil_node_t**message1,	/*!< out: the messages passed with the */
+	void**	message2,	/*!< aio request; note that in case the
+				aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation. */
+	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
+#endif /* LINUX_NATIVE_AIO */
+
+#ifndef UNIV_NONINL
+#include "os0file.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
new file mode 100644
index 00000000000..defd8204ba3
--- /dev/null
+++ b/storage/innobase/include/os0file.ic
@@ -0,0 +1,449 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0file.ic
+The interface to the operating system file io
+
+Created 2/20/2010 Jimmy Yang
+*******************************************************/
+
+#include "univ.i"
+
+#ifdef UNIV_PFS_IO
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create_simple(),
+not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple() which opens or creates a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_func(
+/*===========================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: create mode */
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
+				OS_FILE_READ_WRITE */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	os_file_t	file;
+	struct PSI_file_locker* locker = NULL;
+	PSI_file_locker_state	state;
+
+	/* register a file open or creation depending on "create_mode" */
+	register_pfs_file_open_begin(&state, locker, key,
+				     ((create_mode == OS_FILE_CREATE)
+					? PSI_FILE_CREATE
+					: PSI_FILE_OPEN),
+				     name, src_file, src_line);
+
+	file = os_file_create_simple_func(name, create_mode,
+					  access_type, success);
+
+	/* Regsiter the returning "file" value with the system */
+	register_pfs_file_open_end(locker, file);
+
+	return(file);
+}
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple_no_error_handling(). Add instrumentation to
+monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_no_error_handling_func(
+/*=============================================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode, /*!< in: file create mode */
+	ulint		access_type,/*!< in: OS_FILE_READ_ONLY,
+				OS_FILE_READ_WRITE, or
+				OS_FILE_READ_ALLOW_DELETE; the last option is
+				used by a backup program reading the file */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	os_file_t	file;
+	struct PSI_file_locker* locker = NULL;
+	PSI_file_locker_state	state;
+
+	/* register a file open or creation depending on "create_mode" */
+	register_pfs_file_open_begin(&state, locker, key,
+				     ((create_mode == OS_FILE_CREATE)
+					? PSI_FILE_CREATE
+					: PSI_FILE_OPEN),
+				     name, src_file, src_line);
+
+	file = os_file_create_simple_no_error_handling_func(
+		name, create_mode, access_type, success);
+
+	register_pfs_file_open_end(locker, file);
+
+	return(file);
+}
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create(), not directly
+this function!
+A performance schema wrapper function for os_file_create().
+Add instrumentation to monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_func(
+/*====================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	ulint		create_mode,/*!< in: file create mode */
+	ulint		purpose,/*!< in: OS_FILE_AIO, if asynchronous,
+				non-buffered i/o is desired,
+				OS_FILE_NORMAL, if any normal file;
+				NOTE that it also depends on type, os_aio_..
+				and srv_.. variables whether we really use
+				async i/o or unbuffered i/o: look in the
+				function source code for the exact rules */
+	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	os_file_t	file;
+	struct PSI_file_locker* locker = NULL;
+	PSI_file_locker_state	state;
+
+	/* register a file open or creation depending on "create_mode" */
+	register_pfs_file_open_begin(&state, locker, key,
+				     ((create_mode == OS_FILE_CREATE)
+					? PSI_FILE_CREATE
+					: PSI_FILE_OPEN),
+				     name, src_file, src_line);
+
+	file = os_file_create_func(name, create_mode, purpose, type, success);
+
+	register_pfs_file_open_end(locker, file);
+
+	return(file);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_close(), not directly
+this function!
+A performance schema instrumented wrapper function for os_file_close().
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_close_func(
+/*===================*/
+        os_file_t	file,	/*!< in, own: handle to a file */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	/* register the file close */
+	register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
+				   src_file, src_line);
+
+	result = os_file_close_func(file);
+
+	register_pfs_file_io_end(locker, 0);
+
+	return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_aio(), not directly this
+function!
+Performance schema instrumented wrapper function of os_aio() which
+requests an asynchronous i/o operation.
+@return TRUE if request was queued successfully, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_aio_func(
+/*============*/
+	ulint		type,	/*!< in: OS_FILE_READ or OS_FILE_WRITE */
+	ulint		mode,	/*!< in: OS_AIO_NORMAL etc. I/O mode */
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read or from which
+				to write */
+	os_offset_t	offset,	/*!< in: file offset where to read or write */
+	ulint		n,	/*!< in: number of bytes to read or write */
+	fil_node_t*	message1,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+				OS_AIO_SYNC */
+	void*		message2,/*!< in: message for the aio handler
+				(can be used to identify a completed
+				aio operation); ignored if mode is
+                                OS_AIO_SYNC */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	/* Register the read or write I/O depending on "type" */
+	register_pfs_file_io_begin(&state, locker, file, n,
+				   (type == OS_FILE_WRITE)
+					? PSI_FILE_WRITE
+					: PSI_FILE_READ,
+				   src_file, src_line);
+
+	result = os_aio_func(type, mode, name, file, buf, offset,
+			     n, message1, message2);
+
+	register_pfs_file_io_end(locker, n);
+
+	return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_read(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_read() which requests a synchronous read operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_func(
+/*==================*/
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	os_offset_t	offset,	/*!< in: file offset where to read */
+	ulint		n,	/*!< in: number of bytes to read */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+				   src_file, src_line);
+
+	result = os_file_read_func(file, buf, offset, n);
+
+	register_pfs_file_io_end(locker, n);
+
+	return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro
+os_file_read_no_error_handling(), not directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_read_no_error_handling() which requests a synchronous
+positioned read operation. This function does not do any error
+handling. In case of error it returns FALSE.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_no_error_handling_func(
+/*====================================*/
+	os_file_t	file,	/*!< in: handle to a file */
+	void*		buf,	/*!< in: buffer where to read */
+	os_offset_t	offset,	/*!< in: file offset where to read */
+	ulint		n,	/*!< in: number of bytes to read */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+				   src_file, src_line);
+
+	result = os_file_read_no_error_handling_func(file, buf, offset, n);
+
+	register_pfs_file_io_end(locker, n);
+
+	return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_write(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_write() which requests a synchronous write operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_write_func(
+/*===================*/
+	const char*	name,	/*!< in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/*!< in: handle to a file */
+	const void*	buf,	/*!< in: buffer from which to write */
+	os_offset_t	offset,	/*!< in: file offset where to write */
+	ulint		n,	/*!< in: number of bytes to write */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
+				   src_file, src_line);
+
+	result = os_file_write_func(name, file, buf, offset, n);
+
+	register_pfs_file_io_end(locker, n);
+
+	return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_flush(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_flush() which flushes the write buffers of a given file to the disk.
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_flush_func(
+/*===================*/
+	os_file_t	file,	/*!< in, own: handle to a file */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
+				   src_file, src_line);
+	result = os_file_flush_func(file);
+
+	register_pfs_file_io_end(locker, 0);
+
+	return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_rename(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_rename()
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_rename_func(
+/*====================*/
+	mysql_pfs_key_t key,	/*!< in: Performance Schema Key */
+	const char*	oldpath,/*!< in: old file path as a null-terminated
+				string */
+	const char*	newpath,/*!< in: new file path */
+	const char*	src_file,/*!< in: file name where func invoked */
+	ulint		src_line)/*!< in: line where the func invoked */
+{
+	ibool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath,
+				     src_file, src_line);
+
+	result = os_file_rename_func(oldpath, newpath);
+
+	register_pfs_file_open_end(locker, 0);
+
+	return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_func(
+/*====================*/
+	mysql_pfs_key_t key,		/*!< in: Performance Schema Key */
+	const char*	name,		/*!< in: file path as a null-terminated
+					string */
+	const char*	src_file,	/*!< in: file name where func invoked */
+	ulint		src_line)	/*!< in: line where the func invoked */
+{
+	bool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
+				      name, src_file, src_line);
+
+	result = os_file_delete_func(name);
+
+	register_pfs_file_close_end(locker, 0);
+
+	return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
+directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_delete_if_exists()
+@return TRUE if success */
+UNIV_INLINE
+bool
+pfs_os_file_delete_if_exists_func(
+/*==============================*/
+	mysql_pfs_key_t key,		/*!< in: Performance Schema Key */
+	const char*	name,		/*!< in: file path as a null-terminated
+					string */
+	const char*	src_file,	/*!< in: file name where func invoked */
+	ulint		src_line)	/*!< in: line where the func invoked */
+{
+	bool	result;
+	struct PSI_file_locker*	locker = NULL;
+	PSI_file_locker_state	state;
+
+	register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
+				      name, src_file, src_line);
+
+	result = os_file_delete_if_exists_func(name);
+
+	register_pfs_file_close_end(locker, 0);
+
+	return(result);
+}
+#endif /* UNIV_PFS_IO */
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
new file mode 100644
index 00000000000..a8bbaf1d2d4
--- /dev/null
+++ b/storage/innobase/include/os0once.h
@@ -0,0 +1,125 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0once.h
+A class that aids executing a given function exactly once in a multi-threaded
+environment.
+
+Created Feb 20, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef os0once_h
+#define os0once_h
+
+#include "univ.i"
+
+#include "os0sync.h"
+#include "ut0ut.h"
+
+/** Execute a given function exactly once in a multi-threaded environment
+or wait for the function to be executed by another thread.
+
+Example usage:
+First the user must create a control variable of type os_once::state_t and
+assign it os_once::NEVER_DONE.
+Then the user must pass this variable, together with a function to be
+executed to os_once::do_or_wait_for_done().
+
+Multiple threads can call os_once::do_or_wait_for_done() simultaneously with
+the same (os_once::state_t) control variable. The provided function will be
+called exactly once and when os_once::do_or_wait_for_done() returns then this
+function has completed execution, by this or another thread. In other words
+os_once::do_or_wait_for_done() will either execute the provided function or
+will wait for its execution to complete if it is already called by another
+thread or will do nothing if the function has already completed its execution
+earlier.
+
+This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not
+support passing arguments to the init_routine() function. We should use
+std::call_once() when we start compiling with C++11 enabled. */
+class os_once {
+public:
+	/** Control variables' state type */
+	typedef ib_uint32_t	state_t;
+
+	/** Not yet executed. */
+	static const state_t	NEVER_DONE = 0;
+
+	/** Currently being executed by this or another thread. */
+	static const state_t	IN_PROGRESS = 1;
+
+	/** Finished execution. */
+	static const state_t	DONE = 2;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+	/** Call a given function or wait its execution to complete if it is
+	already called by another thread.
+	@param[in,out]	state		control variable
+	@param[in]	do_func		function to call
+	@param[in,out]	do_func_arg	an argument to pass to do_func(). */
+	static
+	void
+	do_or_wait_for_done(
+		volatile state_t*	state,
+		void			(*do_func)(void*),
+		void*			do_func_arg)
+	{
+		/* Avoid calling os_compare_and_swap_uint32() in the most
+		common case. */
+		if (*state == DONE) {
+			return;
+		}
+
+		if (os_compare_and_swap_uint32(state,
+					       NEVER_DONE, IN_PROGRESS)) {
+			/* We are the first. Call the function. */
+
+			do_func(do_func_arg);
+
+			const bool	swapped = os_compare_and_swap_uint32(
+				state, IN_PROGRESS, DONE);
+
+			ut_a(swapped);
+		} else {
+			/* The state is not NEVER_DONE, so either it is
+			IN_PROGRESS (somebody is calling the function right
+			now or DONE (it has already been called and completed).
+			Wait for it to become DONE. */
+			for (;;) {
+				const state_t	s = *state;
+
+				switch (s) {
+				case DONE:
+					return;
+				case IN_PROGRESS:
+					break;
+				case NEVER_DONE:
+					/* fall through */
+				default:
+					ut_error;
+				}
+
+				UT_RELAX_CPU();
+			}
+		}
+	}
+#endif /* HAVE_ATOMIC_BUILTINS */
+};
+
+#endif /* os0once_h */
diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
new file mode 100644
index 00000000000..613e3bd6947
--- /dev/null
+++ b/storage/innobase/include/os0proc.h
@@ -0,0 +1,77 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0proc.h
+The interface to the operating system
+process control primitives
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0proc_h
+#define os0proc_h
+
+#include "univ.i"
+
+#ifdef UNIV_LINUX
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
+typedef void*			os_process_t;
+typedef unsigned long int	os_process_id_t;
+
+extern ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+extern ulint os_large_page_size;
+
+/****************************************************************//**
+Converts the current process id to a number. It is not guaranteed that the
+number is unique. In Linux returns the 'process number' of the current
+thread. That number is the same as one sees in 'top', for example. In Linux
+the thread id is not the same as one sees in 'top'.
+@return	process id as a number */
+UNIV_INTERN
+ulint
+os_proc_get_number(void);
+/*====================*/
+/****************************************************************//**
+Allocates large pages memory.
+@return	allocated memory */
+UNIV_INTERN
+void*
+os_mem_alloc_large(
+/*===============*/
+	ulint*	n);			/*!< in/out: number of bytes */
+/****************************************************************//**
+Frees large pages memory. */
+UNIV_INTERN
+void
+os_mem_free_large(
+/*==============*/
+	void	*ptr,			/*!< in: pointer returned by
+					os_mem_alloc_large() */
+	ulint	size);			/*!< in: size returned by
+					os_mem_alloc_large() */
+
+#ifndef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic
new file mode 100644
index 00000000000..506f4f8ce0c
--- /dev/null
+++ b/storage/innobase/include/os0proc.ic
@@ -0,0 +1,27 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0proc.ic
+The interface to the operating system
+process control primitives
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
new file mode 100644
index 00000000000..57b29fff663
--- /dev/null
+++ b/storage/innobase/include/os0sync.h
@@ -0,0 +1,743 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0sync.h
+The interface to the operating system
+synchronization primitives.
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0sync_h
+#define os0sync_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "sync0types.h"
+
+#ifdef __WIN__
+/** Native event (slow)*/
+typedef HANDLE			os_native_event_t;
+/** Native mutex */
+typedef CRITICAL_SECTION	fast_mutex_t;
+/** Native condition variable. */
+typedef CONDITION_VARIABLE	os_cond_t;
+#else
+/** Native mutex */
+typedef pthread_mutex_t		fast_mutex_t;
+/** Native condition variable */
+typedef pthread_cond_t		os_cond_t;
+#endif
+
+/** Structure that includes Performance Schema Probe pfs_psi
+in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
+struct os_fast_mutex_t {
+	fast_mutex_t		mutex;	/*!< os_fast_mutex */
+#ifdef UNIV_PFS_MUTEX
+	struct PSI_mutex*	pfs_psi;/*!< The performance schema
+					instrumentation hook */
+#endif
+};
+
+/** Operating system event handle */
+typedef struct os_event*	os_event_t;
+
+/** An asynchronous signal sent between threads */
+struct os_event {
+#ifdef __WIN__
+	HANDLE		handle;		/*!< kernel event object, slow,
+					used on older Windows */
+#endif
+	os_fast_mutex_t	os_mutex;	/*!< this mutex protects the next
+					fields */
+	ibool		is_set;		/*!< this is TRUE when the event is
+					in the signaled state, i.e., a thread
+					does not stop if it tries to wait for
+					this event */
+	ib_int64_t	signal_count;	/*!< this is incremented each time
+					the event becomes signaled */
+	os_cond_t	cond_var;	/*!< condition variable is used in
+					waiting for the event */
+	UT_LIST_NODE_T(os_event_t) os_event_list;
+					/*!< list of all created events */
+};
+
+/** Denotes an infinite delay for os_event_wait_time() */
+#define OS_SYNC_INFINITE_TIME   ULINT_UNDEFINED
+
+/** Return value of os_event_wait_time() when the time is exceeded */
+#define OS_SYNC_TIME_EXCEEDED   1
+
+/** Operating system mutex handle */
+typedef struct os_mutex_t*	os_ib_mutex_t;
+
+/** Mutex protecting counts and the event and OS 'slow' mutex lists */
+extern os_ib_mutex_t	os_sync_mutex;
+
+/** This is incremented by 1 in os_thread_create and decremented by 1 in
+os_thread_exit */
+extern ulint		os_thread_count;
+
+extern ulint		os_event_count;
+extern ulint		os_mutex_count;
+extern ulint		os_fast_mutex_count;
+
+/*********************************************************//**
+Initializes global event and OS 'slow' mutex lists. */
+UNIV_INTERN
+void
+os_sync_init(void);
+/*==============*/
+/*********************************************************//**
+Frees created events and OS 'slow' mutexes. */
+UNIV_INTERN
+void
+os_sync_free(void);
+/*==============*/
+/*********************************************************//**
+Creates an event semaphore, i.e., a semaphore which may just have two states:
+signaled and nonsignaled. The created event is manual reset: it must be reset
+explicitly by calling sync_os_reset_event.
+@return	the event handle */
+UNIV_INTERN
+os_event_t
+os_event_create(void);
+/*==================*/
+/**********************************************************//**
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+UNIV_INTERN
+void
+os_event_set(
+/*=========*/
+	os_event_t	event);	/*!< in: event to set */
+/**********************************************************//**
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event.
+The return value should be passed to os_even_wait_low() if it is desired
+that this thread should not wait in case of an intervening call to
+os_event_set() between this os_event_reset() and the
+os_event_wait_low() call. See comments for os_event_wait_low(). */
+UNIV_INTERN
+ib_int64_t
+os_event_reset(
+/*===========*/
+	os_event_t	event);	/*!< in: event to reset */
+/**********************************************************//**
+Frees an event object. */
+UNIV_INTERN
+void
+os_event_free(
+/*==========*/
+	os_event_t	event);	/*!< in: event to free */
+
+/**********************************************************//**
+Waits for an event object until it is in the signaled state.
+
+Typically, if the event has been signalled after the os_event_reset()
+we'll return immediately because event->is_set == TRUE.
+There are, however, situations (e.g.: sync_array code) where we may
+lose this information. For example:
+
+thread A calls os_event_reset()
+thread B calls os_event_set()   [event->is_set == TRUE]
+thread C calls os_event_reset() [event->is_set == FALSE]
+thread A calls os_event_wait()  [infinite wait!]
+thread C calls os_event_wait()  [infinite wait!]
+
+Where such a scenario is possible, to avoid infinite wait, the
+value returned by os_event_reset() should be passed in as
+reset_sig_count. */
+UNIV_INTERN
+void
+os_event_wait_low(
+/*==============*/
+	os_event_t	event,		/*!< in: event to wait */
+	ib_int64_t	reset_sig_count);/*!< in: zero or the value
+					returned by previous call of
+					os_event_reset(). */
+
+#define os_event_wait(event) os_event_wait_low(event, 0)
+#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
+
+/**********************************************************//**
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. In Unix the timeout is always infinite.
+@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
+UNIV_INTERN
+ulint
+os_event_wait_time_low(
+/*===================*/
+	os_event_t	event,			/*!< in: event to wait */
+	ulint		time_in_usec,		/*!< in: timeout in
+						microseconds, or
+						OS_SYNC_INFINITE_TIME */
+	ib_int64_t	reset_sig_count);	/*!< in: zero or the value
+						returned by previous call of
+						os_event_reset(). */
+/*********************************************************//**
+Creates an operating system mutex semaphore. Because these are slow, the
+mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
+@return	the mutex handle */
+UNIV_INTERN
+os_ib_mutex_t
+os_mutex_create(void);
+/*=================*/
+/**********************************************************//**
+Acquires ownership of a mutex semaphore. */
+UNIV_INTERN
+void
+os_mutex_enter(
+/*===========*/
+	os_ib_mutex_t	mutex);	/*!< in: mutex to acquire */
+/**********************************************************//**
+Releases ownership of a mutex. */
+UNIV_INTERN
+void
+os_mutex_exit(
+/*==========*/
+	os_ib_mutex_t	mutex);	/*!< in: mutex to release */
+/**********************************************************//**
+Frees an mutex object. */
+UNIV_INTERN
+void
+os_mutex_free(
+/*==========*/
+	os_ib_mutex_t	mutex);	/*!< in: mutex to free */
+/**********************************************************//**
+Acquires ownership of a fast mutex. Currently in Windows this is the same
+as os_fast_mutex_lock!
+@return	0 if success, != 0 if was reserved by another thread */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in: mutex to acquire */
+
+/**********************************************************************
+Following os_fast_ mutex APIs would be performance schema instrumented:
+
+os_fast_mutex_init
+os_fast_mutex_lock
+os_fast_mutex_unlock
+os_fast_mutex_free
+
+These mutex APIs will point to corresponding wrapper functions that contain
+the performance schema instrumentation.
+
+NOTE! The following macro should be used in mutex operation, not the
+corresponding function. */
+
+#ifdef UNIV_PFS_MUTEX
+# define os_fast_mutex_init(K, M)			\
+	pfs_os_fast_mutex_init(K, M)
+
+# define os_fast_mutex_lock(M)				\
+	pfs_os_fast_mutex_lock(M, __FILE__, __LINE__)
+
+# define os_fast_mutex_unlock(M)	pfs_os_fast_mutex_unlock(M)
+
+# define os_fast_mutex_free(M)		pfs_os_fast_mutex_free(M)
+
+/*********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
+this function!
+A wrapper function for os_fast_mutex_init_func(). Initializes an operating
+system fast mutex semaphore. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_init(
+/*===================*/
+	PSI_mutex_key		key,		/*!< in: Performance Schema
+						key */
+	os_fast_mutex_t*	fast_mutex);	/*!< out: fast mutex */
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
+this function!
+Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_free(
+/*===================*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in/out: mutex to free */
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
+this function!
+Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_lock(
+/*===================*/
+	os_fast_mutex_t*	fast_mutex,	/*!< in/out: mutex to acquire */
+	const char*		file_name,	/*!< in: file name where
+						 locked */
+	ulint			line);		/*!< in: line where locked */
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
+this function!
+Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_unlock(
+/*=====================*/
+	os_fast_mutex_t*	fast_mutex);	/*!< in/out: mutex to release */
+
+#else /* UNIV_PFS_MUTEX */
+
+# define os_fast_mutex_init(K, M)			\
+	os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex)
+
+# define os_fast_mutex_lock(M)				\
+	os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex)
+
+# define os_fast_mutex_unlock(M)			\
+	os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex)
+
+# define os_fast_mutex_free(M)				\
+	os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex)
+#endif /* UNIV_PFS_MUTEX */
+
+/**********************************************************//**
+Releases ownership of a fast mutex. */
+UNIV_INTERN
+void
+os_fast_mutex_unlock_func(
+/*======================*/
+	fast_mutex_t*		fast_mutex);	/*!< in: mutex to release */
+/*********************************************************//**
+Initializes an operating system fast mutex semaphore. */
+UNIV_INTERN
+void
+os_fast_mutex_init_func(
+/*====================*/
+	fast_mutex_t*		fast_mutex);	/*!< in: fast mutex */
+/**********************************************************//**
+Acquires ownership of a fast mutex. */
+UNIV_INTERN
+void
+os_fast_mutex_lock_func(
+/*====================*/
+	fast_mutex_t*		fast_mutex);	/*!< in: mutex to acquire */
+/**********************************************************//**
+Frees an mutex object. */
+UNIV_INTERN
+void
+os_fast_mutex_free_func(
+/*====================*/
+	fast_mutex_t*		fast_mutex);	/*!< in: mutex to free */
+
+/**********************************************************//**
+Atomic compare-and-swap and increment for InnoDB. */
+
+#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
+
+# define HAVE_ATOMIC_BUILTINS
+
+# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
+#  define HAVE_ATOMIC_BUILTINS_BYTE
+# endif
+
+# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
+#  define HAVE_ATOMIC_BUILTINS_64
+# endif
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap(ptr, old_val, new_val) \
+	__sync_bool_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+
+#  define os_compare_and_swap_uint32(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+
+# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
+#  define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	os_compare_and_swap(ptr, old_val, new_val)
+#  define INNODB_RW_LOCKS_USE_ATOMICS
+#  define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes and rw_locks use GCC atomic builtins"
+# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
+#  define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes use GCC atomic builtins, rw_locks do not"
+# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment(ptr, amount) \
+	__sync_add_and_fetch(ptr, amount)
+
+# define os_atomic_increment_lint(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_uint32(ptr, amount ) \
+	os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_ulint(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+
+# define os_atomic_increment_uint64(ptr, amount) \
+	os_atomic_increment(ptr, amount)
+
+/* Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. */
+
+# define os_atomic_decrement(ptr, amount) \
+	__sync_sub_and_fetch(ptr, amount)
+
+# define os_atomic_decrement_uint32(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_lint(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+# define os_atomic_decrement_uint64(ptr, amount) \
+	os_atomic_decrement(ptr, amount)
+
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val */
+
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+	__sync_lock_test_and_set(ptr, (byte) new_val)
+
+# define os_atomic_test_and_set_ulint(ptr, new_val) \
+	__sync_lock_test_and_set(ptr, new_val)
+
+#elif defined(HAVE_IB_SOLARIS_ATOMICS)
+
+# define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS_BYTE
+# define HAVE_ATOMIC_BUILTINS_64
+
+/* If not compiling with GCC or GCC doesn't support the atomic
+intrinsics and running on Solaris >= 10 use Solaris atomics */
+
+# include <atomic.h>
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
+	(atomic_cas_32(ptr, old_val, new_val) == old_val)
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	(atomic_cas_ulong(ptr, old_val, new_val) == old_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
+
+# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
+#  if SIZEOF_PTHREAD_T == 4
+#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val)
+#  elif SIZEOF_PTHREAD_T == 8
+#   define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val)
+#  else
+#   error "SIZEOF_PTHREAD_T != 4 or 8"
+#  endif /* SIZEOF_PTHREAD_T CHECK */
+#  define INNODB_RW_LOCKS_USE_ATOMICS
+#  define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes and rw_locks use Solaris atomic functions"
+# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
+#  define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes use Solaris atomic functions, rw_locks do not"
+# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment_uint32(ptr, amount) \
+	atomic_add_32_nv(ptr, amount)
+
+# define os_atomic_increment_ulint(ptr, amount) \
+	atomic_add_long_nv(ptr, amount)
+
+# define os_atomic_increment_lint(ptr, amount) \
+	os_atomic_increment_ulint((ulong_t*) ptr, amount)
+
+# define os_atomic_increment_uint64(ptr, amount) \
+	atomic_add_64_nv(ptr, amount)
+
+/* Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. */
+
+# define os_atomic_decrement_uint32(ptr, amount) \
+	os_atomic_increment_uint32(ptr, -(amount))
+
+# define os_atomic_decrement_lint(ptr, amount) \
+	os_atomic_increment_ulint((ulong_t*) ptr, -(amount))
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+	os_atomic_increment_ulint(ptr, -(amount))
+
+# define os_atomic_decrement_uint64(ptr, amount) \
+	os_atomic_increment_uint64(ptr, -(amount))
+
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val */
+
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+	atomic_swap_uchar(ptr, new_val)
+
+# define os_atomic_test_and_set_ulint(ptr, new_val) \
+	atomic_swap_ulong(ptr, new_val)
+
+#elif defined(HAVE_WINDOWS_ATOMICS)
+
+# define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS_BYTE
+
+# ifndef _WIN32
+#  define HAVE_ATOMIC_BUILTINS_64
+# endif
+
+/**********************************************************//**
+Atomic compare and exchange of signed integers (both 32 and 64 bit).
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+lint
+win_cmp_and_xchg_lint(
+/*==================*/
+	volatile lint*	ptr,		/*!< in/out: source/destination */
+	lint		new_val,	/*!< in: exchange value */
+	lint		old_val);	/*!< in: value to compare to */
+
+/**********************************************************//**
+Atomic addition of signed integers.
+@return Initial value of the variable pointed to by ptr */
+UNIV_INLINE
+lint
+win_xchg_and_add(
+/*=============*/
+	volatile lint*	ptr,	/*!< in/out: address of destination */
+	lint		val);	/*!< in: number to be added */
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+ulint
+win_cmp_and_xchg_ulint(
+/*===================*/
+	volatile ulint*	ptr,		/*!< in/out: source/destination */
+	ulint		new_val,	/*!< in: exchange value */
+	ulint		old_val);	/*!< in: value to compare to */
+
+/**********************************************************//**
+Atomic compare and exchange of 32 bit unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+DWORD
+win_cmp_and_xchg_dword(
+/*===================*/
+	volatile DWORD*	ptr,		/*!< in/out: source/destination */
+	DWORD		new_val,	/*!< in: exchange value */
+	DWORD		old_val);	/*!< in: value to compare to */
+
+/**********************************************************//**
+Returns true if swapped, ptr is pointer to target, old_val is value to
+compare to, new_val is the value to swap in. */
+
+# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
+	(InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
+				    new_val, old_val) == old_val)
+
+# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
+	(win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
+
+# define os_compare_and_swap_lint(ptr, old_val, new_val) \
+	(win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
+
+/* windows thread objects can always be passed to windows atomic functions */
+# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
+	(win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
+
+# define INNODB_RW_LOCKS_USE_ATOMICS
+# define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes and rw_locks use Windows interlocked functions"
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount of increment. */
+
+# define os_atomic_increment_lint(ptr, amount) \
+	(win_xchg_and_add(ptr, amount) + amount)
+
+# define os_atomic_increment_uint32(ptr, amount) \
+	((ulint) InterlockedExchangeAdd((long*) ptr, amount))
+
+# define os_atomic_increment_ulint(ptr, amount) \
+	((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
+
+# define os_atomic_increment_uint64(ptr, amount)		\
+	((ib_uint64_t) (InterlockedExchangeAdd64(		\
+				(ib_int64_t*) ptr,		\
+				(ib_int64_t) amount) + amount))
+
+/**********************************************************//**
+Returns the resulting value, ptr is pointer to target, amount is the
+amount to decrement. There is no atomic substract function on Windows */
+
+# define os_atomic_decrement_uint32(ptr, amount) \
+	((ulint) InterlockedExchangeAdd((long*) ptr, (-amount)))
+
+# define os_atomic_decrement_lint(ptr, amount) \
+	(win_xchg_and_add(ptr, -(lint) amount) - amount)
+
+# define os_atomic_decrement_ulint(ptr, amount) \
+	((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
+
+# define os_atomic_decrement_uint64(ptr, amount)		\
+	((ib_uint64_t) (InterlockedExchangeAdd64(		\
+				(ib_int64_t*) ptr,		\
+				-(ib_int64_t) amount) - amount))
+
+/**********************************************************//**
+Returns the old value of *ptr, atomically sets *ptr to new_val.
+InterlockedExchange() operates on LONG, and the LONG will be
+clobbered */
+
+# define os_atomic_test_and_set_byte(ptr, new_val) \
+	((byte) InterlockedExchange(ptr, new_val))
+
+# define os_atomic_test_and_set_ulong(ptr, new_val) \
+	InterlockedExchange(ptr, new_val)
+
+#else
+# define IB_ATOMICS_STARTUP_MSG \
+	"Mutexes and rw_locks use InnoDB's own implementation"
+#endif
+#ifdef HAVE_ATOMIC_BUILTINS
+#define os_atomic_inc_ulint(m,v,d)	os_atomic_increment_ulint(v, d)
+#define os_atomic_dec_ulint(m,v,d)	os_atomic_decrement_ulint(v, d)
+#else
+#define os_atomic_inc_ulint(m,v,d)	os_atomic_inc_ulint_func(m, v, d)
+#define os_atomic_dec_ulint(m,v,d)	os_atomic_dec_ulint_func(m, v, d)
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+/**********************************************************//**
+Following macros are used to update specified counter atomically
+if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in
+for synchronization */
+#ifdef HAVE_ATOMIC_BUILTINS
+#define os_increment_counter_by_amount(mutex, counter, amount)	\
+	(void) os_atomic_increment_ulint(&counter, amount)
+
+#define os_decrement_counter_by_amount(mutex, counter, amount)	\
+	(void) os_atomic_increment_ulint(&counter, (-((lint) amount)))
+#else
+#define os_increment_counter_by_amount(mutex, counter, amount)	\
+	do {							\
+		mutex_enter(&(mutex));				\
+		(counter) += (amount);				\
+		mutex_exit(&(mutex));				\
+	} while (0)
+
+#define os_decrement_counter_by_amount(mutex, counter, amount)	\
+	do {							\
+		ut_a(counter >= amount);			\
+		mutex_enter(&(mutex));				\
+		(counter) -= (amount);				\
+		mutex_exit(&(mutex));				\
+	} while (0)
+#endif  /* HAVE_ATOMIC_BUILTINS */
+
+#define os_inc_counter(mutex, counter)				\
+	os_increment_counter_by_amount(mutex, counter, 1)
+
+#define os_dec_counter(mutex, counter)				\
+	do {							\
+		os_decrement_counter_by_amount(mutex, counter, 1);\
+	} while (0);
+
+/** barrier definitions for memory ordering */
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
+/* Performance regression was observed at some conditions for Intel
+architecture. Disable memory barrier for Intel architecture for now. */
+# define os_rmb
+# define os_wmb
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"Memory barrier is not used"
+#elif defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
+# define HAVE_MEMORY_BARRIER
+# define os_rmb	__atomic_thread_fence(__ATOMIC_ACQUIRE)
+# define os_wmb	__atomic_thread_fence(__ATOMIC_RELEASE)
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"GCC builtin __atomic_thread_fence() is used for memory barrier"
+
+#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE)
+# define HAVE_MEMORY_BARRIER
+# define os_rmb	__sync_synchronize()
+# define os_wmb	__sync_synchronize()
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"GCC builtin __sync_synchronize() is used for memory barrier"
+
+#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS)
+# define HAVE_MEMORY_BARRIER
+# include <mbarrier.h>
+# define os_rmb	__machine_r_barrier()
+# define os_wmb	__machine_w_barrier()
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"Solaris memory ordering functions are used for memory barrier"
+
+#elif defined(HAVE_WINDOWS_MM_FENCE) && defined(_WIN64)
+# define HAVE_MEMORY_BARRIER
+# include <mmintrin.h>
+# define os_rmb	_mm_lfence()
+# define os_wmb	_mm_sfence()
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"_mm_lfence() and _mm_sfence() are used for memory barrier"
+
+#else
+# define os_rmb
+# define os_wmb
+# define IB_MEMORY_BARRIER_STARTUP_MSG \
+	"Memory barrier is not used"
+#endif
+
+#ifndef UNIV_NONINL
+#include "os0sync.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
new file mode 100644
index 00000000000..9a7e520ece6
--- /dev/null
+++ b/storage/innobase/include/os0sync.ic
@@ -0,0 +1,234 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0sync.ic
+The interface to the operating system synchronization primitives.
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#ifdef __WIN__
+#include <winbase.h>
+#endif
+
+/**********************************************************//**
+Acquires ownership of a fast mutex.
+@return	0 if success, != 0 if was reserved by another thread */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
+{
+	fast_mutex_t*	mutex = &fast_mutex->mutex;
+
+#ifdef __WIN__
+	return(!TryEnterCriticalSection(mutex));
+#else
+	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
+	so that it returns 0 on success. In the operating system
+	libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
+	returns 1 on success (but MySQL remaps that to 0), while Linux,
+	FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
+
+	return((ulint) pthread_mutex_trylock(mutex));
+#endif
+}
+
+#ifdef UNIV_PFS_MUTEX
+/*********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
+this function!
+A wrapper function for os_fast_mutex_init_func(). Initializes an operating
+system fast mutex semaphore. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_init(
+/*===================*/
+	PSI_mutex_key		key,		/*!< in: Performance Schema
+						key */
+	os_fast_mutex_t*	fast_mutex)	/*!< out: fast mutex */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+	fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
+#else
+	fast_mutex->pfs_psi = NULL;
+#endif
+
+	os_fast_mutex_init_func(&fast_mutex->mutex);
+}
+/******************************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
+this function!
+Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_free(
+/*===================*/
+	os_fast_mutex_t*	fast_mutex)  /*!< in/out: mutex */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+	if (fast_mutex->pfs_psi != NULL)
+		PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
+#endif
+	fast_mutex->pfs_psi = NULL;
+
+	os_fast_mutex_free_func(&fast_mutex->mutex);
+}
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
+this function!
+Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast
+mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_lock(
+/*===================*/
+	os_fast_mutex_t*	fast_mutex,	/*!< in/out: mutex to acquire */
+	const char*		file_name,	/*!< in: file name where
+						 locked */
+	ulint			line)		/*!< in: line where locked */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+	if (fast_mutex->pfs_psi != NULL)
+	{
+		PSI_mutex_locker* 	locker;
+		PSI_mutex_locker_state	state;
+
+		locker = PSI_MUTEX_CALL(start_mutex_wait)(
+			&state, fast_mutex->pfs_psi,
+			PSI_MUTEX_LOCK, file_name,
+			static_cast<uint>(line));
+
+		os_fast_mutex_lock_func(&fast_mutex->mutex);
+
+		if (locker != NULL)
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
+	}
+	else
+#endif
+	{
+		os_fast_mutex_lock_func(&fast_mutex->mutex);
+	}
+
+	return;
+}
+/**********************************************************//**
+NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
+this function!
+Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a
+fast mutex. */
+UNIV_INLINE
+void
+pfs_os_fast_mutex_unlock(
+/*=====================*/
+	os_fast_mutex_t*	fast_mutex)	/*!< in/out: mutex to release */
+{
+#ifdef HAVE_PSI_MUTEX_INTERFACE
+	if (fast_mutex->pfs_psi != NULL)
+		PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
+#endif
+
+	os_fast_mutex_unlock_func(&fast_mutex->mutex);
+}
+#endif /* UNIV_PFS_MUTEX */
+
+#ifdef HAVE_WINDOWS_ATOMICS
+
+/* Use inline functions to make 64 and 32 bit versions of windows atomic
+functions so that typecasts are evaluated at compile time. Take advantage
+that lint is either __int64 or long int and windows atomic functions work
+on __int64 and LONG */
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+lint
+win_cmp_and_xchg_lint(
+/*==================*/
+	volatile lint*	ptr,		/*!< in/out: source/destination */
+	lint		new_val,	/*!< in: exchange value */
+	lint		old_val)	/*!< in: value to compare to */
+{
+# ifdef _WIN64
+	return(InterlockedCompareExchange64(ptr, new_val, old_val));
+# else
+	return(InterlockedCompareExchange(ptr, new_val, old_val));
+# endif
+}
+
+/**********************************************************//**
+Atomic addition of signed integers.
+@return Initial value of the variable pointed to by ptr */
+UNIV_INLINE
+lint
+win_xchg_and_add(
+/*=============*/
+	volatile lint*	ptr,	/*!< in/out: address of destination */
+	lint		val)	/*!< in: number to be added */
+{
+#ifdef _WIN64
+	return(InterlockedExchangeAdd64(ptr, val));
+#else
+	return(InterlockedExchangeAdd(ptr, val));
+#endif
+}
+
+/**********************************************************//**
+Atomic compare and exchange of unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+ulint
+win_cmp_and_xchg_ulint(
+/*===================*/
+	volatile ulint*	ptr,		/*!< in/out: source/destination */
+	ulint		new_val,	/*!< in: exchange value */
+	ulint		old_val)	/*!< in: value to compare to */
+{
+	return((ulint) win_cmp_and_xchg_lint(
+		(volatile lint*) ptr,
+		(lint) new_val,
+		(lint) old_val));
+}
+
+/**********************************************************//**
+Atomic compare and exchange of 32-bit unsigned integers.
+@return value found before the exchange.
+If it is not equal to old_value the exchange did not happen. */
+UNIV_INLINE
+DWORD
+win_cmp_and_xchg_dword(
+/*===================*/
+	volatile DWORD*	ptr,		/*!< in/out: source/destination */
+	DWORD		new_val,	/*!< in: exchange value */
+	DWORD		old_val)	/*!< in: value to compare to */
+{
+	ut_ad(sizeof(DWORD) == sizeof(LONG));	/* We assume this. */
+	return(InterlockedCompareExchange(
+		(volatile LONG*) ptr,
+		(LONG) new_val,
+		(LONG) old_val));
+}
+
+#endif /* HAVE_WINDOWS_ATOMICS */
+
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
new file mode 100644
index 00000000000..37c54afae80
--- /dev/null
+++ b/storage/innobase/include/os0thread.h
@@ -0,0 +1,154 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0thread.h
+The interface to the operating system
+process and thread control primitives
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0thread_h
+#define os0thread_h
+
+#include "univ.i"
+
+/* Maximum number of threads which can be created in the program;
+this is also the size of the wait slot array for MySQL threads which
+can wait inside InnoDB */
+
+#define	OS_THREAD_MAX_N		srv_max_n_threads
+
+/* Possible fixed priorities for threads */
+#define OS_THREAD_PRIORITY_NONE		100
+#define OS_THREAD_PRIORITY_BACKGROUND	1
+#define OS_THREAD_PRIORITY_NORMAL	2
+#define OS_THREAD_PRIORITY_ABOVE_NORMAL	3
+
+#ifdef __WIN__
+typedef void*			os_thread_t;
+typedef DWORD			os_thread_id_t;	/*!< In Windows the thread id
+						is an unsigned long int */
+extern "C"  {
+typedef LPTHREAD_START_ROUTINE	os_thread_func_t;
+}
+
+/** Macro for specifying a Windows thread start function. */
+#define DECLARE_THREAD(func)	WINAPI func
+
+/** Required to get around a build error on Windows. Even though our functions
+are defined/declared as WINAPI f(LPVOID a); the compiler complains that they
+are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions
+don't access the arguments and don't return any value, we should be safe. */
+#define os_thread_create(f,a,i)	\
+	os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i)
+
+#else
+
+typedef pthread_t		os_thread_t;
+typedef os_thread_t		os_thread_id_t;	/*!< In Unix we use the thread
+						handle itself as the id of
+						the thread */
+extern "C"  { typedef void*	(*os_thread_func_t)(void*); }
+
+/** Macro for specifying a POSIX thread start function. */
+#define DECLARE_THREAD(func)	func
+#define os_thread_create(f,a,i)	os_thread_create_func(f, a, i)
+
+#endif /* __WIN__ */
+
+/* Define a function pointer type to use in a typecast */
+typedef void* (*os_posix_f_t) (void*);
+
+#ifdef HAVE_PSI_INTERFACE
+/* Define for performance schema registration key */
+typedef unsigned int    mysql_pfs_key_t;
+#endif
+
+/***************************************************************//**
+Compares two thread ids for equality.
+@return	TRUE if equal */
+UNIV_INTERN
+ibool
+os_thread_eq(
+/*=========*/
+	os_thread_id_t	a,	/*!< in: OS thread or thread id */
+	os_thread_id_t	b);	/*!< in: OS thread or thread id */
+/****************************************************************//**
+Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
+unique for the thread though!
+@return	thread identifier as a number */
+UNIV_INTERN
+ulint
+os_thread_pf(
+/*=========*/
+	os_thread_id_t	a);	/*!< in: OS thread identifier */
+/****************************************************************//**
+Creates a new thread of execution. The execution starts from
+the function given. The start function takes a void* parameter
+and returns a ulint.
+NOTE: We count the number of threads in os_thread_exit(). A created
+thread should always use that to exit and not use return() to exit.
+@return	handle to the thread */
+UNIV_INTERN
+os_thread_t
+os_thread_create_func(
+/*==================*/
+	os_thread_func_t	func,		/*!< in: pointer to function
+						from which to start */
+	void*			arg,		/*!< in: argument to start
+						function */
+	os_thread_id_t*		thread_id);	/*!< out: id of the created
+						thread, or NULL */
+
+/*****************************************************************//**
+Exits the current thread. */
+UNIV_INTERN
+void
+os_thread_exit(
+/*===========*/
+	void*	exit_value)	/*!< in: exit value; in Windows this void*
+				is cast as a DWORD */
+	UNIV_COLD __attribute__((noreturn));
+/*****************************************************************//**
+Returns the thread identifier of current thread.
+@return	current thread identifier */
+UNIV_INTERN
+os_thread_id_t
+os_thread_get_curr_id(void);
+/*========================*/
+/*****************************************************************//**
+Advises the os to give up remainder of the thread's time slice. */
+UNIV_INTERN
+void
+os_thread_yield(void);
+/*=================*/
+/*****************************************************************//**
+The thread sleeps at least the time given in microseconds. */
+UNIV_INTERN
+void
+os_thread_sleep(
+/*============*/
+	ulint	tm);	/*!< in: time in microseconds */
+
+#ifndef UNIV_NONINL
+#include "os0thread.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic
new file mode 100644
index 00000000000..0622d22f2dc
--- /dev/null
+++ b/storage/innobase/include/os0thread.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0thread.ic
+The interface to the operating system
+process and thread control primitives
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
new file mode 100644
index 00000000000..b1ad49b4915
--- /dev/null
+++ b/storage/innobase/include/page0cur.h
@@ -0,0 +1,387 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/page0cur.h
+The page cursor
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef page0cur_h
+#define page0cur_h
+
+#include "univ.i"
+
+#include "buf0types.h"
+#include "page0page.h"
+#include "rem0rec.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+
+
+#define PAGE_CUR_ADAPT
+
+/* Page cursor search modes; the values must be in this order! */
+
+#define	PAGE_CUR_UNSUPP	0
+#define	PAGE_CUR_G	1
+#define	PAGE_CUR_GE	2
+#define	PAGE_CUR_L	3
+#define	PAGE_CUR_LE	4
+/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
+				 "column LIKE 'abc%' ORDER BY column DESC";
+				 we have to find strings which are <= 'abc' or
+				 which extend it */
+#ifdef UNIV_SEARCH_DEBUG
+# define PAGE_CUR_DBG	6	/* As PAGE_CUR_LE, but skips search shortcut */
+#endif /* UNIV_SEARCH_DEBUG */
+
+#ifdef UNIV_DEBUG
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+/*********************************************************//**
+Gets pointer to the buffer block where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+buf_block_t*
+page_cur_get_block(
+/*===============*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_zip_des_t*
+page_cur_get_page_zip(
+/*==================*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+/*********************************************************//**
+Gets the record where the cursor is positioned.
+@return	record */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+	page_cur_t*	cur);	/*!< in: page cursor */
+#else /* UNIV_DEBUG */
+# define page_cur_get_page(cur)		page_align((cur)->rec)
+# define page_cur_get_block(cur)	(cur)->block
+# define page_cur_get_page_zip(cur)	buf_block_get_page_zip((cur)->block)
+# define page_cur_get_rec(cur)		(cur)->rec
+#endif /* UNIV_DEBUG */
+/*********************************************************//**
+Sets the cursor object to point before the first user record
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur);	/*!< in: cursor */
+/*********************************************************//**
+Sets the cursor object to point after the last user record on
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur);	/*!< in: cursor */
+/*********************************************************//**
+Returns TRUE if the cursor is before first user record on page.
+@return	TRUE if at start */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+	const page_cur_t*	cur);	/*!< in: cursor */
+/*********************************************************//**
+Returns TRUE if the cursor is after last user record.
+@return	TRUE if at end */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+	const page_cur_t*	cur);	/*!< in: cursor */
+/**********************************************************//**
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record on a page */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	page_cur_t*		cur);	/*!< out: page cursor */
+/**********************************************************//**
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+	page_cur_t*	cur);	/*!< out: page cursor */
+/**********************************************************//**
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+	page_cur_t*	cur);	/*!< in/out: cursor; must not be after last */
+/**********************************************************//**
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+	page_cur_t*	cur);	/*!< in/out: cursor; not before first */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const dtuple_t*	tuple,	/*!< in: pointer to a data tuple */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	__attribute__((nonnull(1,2,3,4,5), warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const rec_t*	rec,	/*!< in: record to insert */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle, or NULL */
+/***********************************************************//**
+Inserts a record next to page cursor on an uncompressed page.
+Returns pointer to inserted record if succeed, i.e., enough
+space available, NULL otherwise. The cursor stays at the same position.
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+	rec_t*		current_rec,/*!< in: pointer to current record after
+				which the new record is inserted */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const rec_t*	rec,	/*!< in: pointer to a physical record */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	__attribute__((nonnull(1,2,3,4), warn_unused_result));
+/***********************************************************//**
+Inserts a record next to page cursor on a compressed and uncompressed
+page. Returns pointer to inserted record if succeed, i.e.,
+enough space available, NULL otherwise.
+The cursor stays at the same position.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INTERN
+rec_t*
+page_cur_insert_rec_zip(
+/*====================*/
+	page_cur_t*	cursor,	/*!< in/out: page cursor */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const rec_t*	rec,	/*!< in: pointer to a physical record */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+	__attribute__((nonnull(1,2,3,4), warn_unused_result));
+/*************************************************************//**
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
+UNIV_INTERN
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+	page_t*		new_page,	/*!< in/out: index page to copy to */
+	rec_t*		rec,		/*!< in: first record to copy */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/***********************************************************//**
+Deletes a record at the page cursor. The cursor is moved to the
+next record after the deleted one. */
+UNIV_INTERN
+void
+page_cur_delete_rec(
+/*================*/
+	page_cur_t*		cursor,	/*!< in/out: a page cursor */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const ulint*		offsets,/*!< in: rec_get_offsets(
+					cursor->rec, index) */
+	mtr_t*			mtr);	/*!< in: mini-transaction handle */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Searches the right position for a page cursor.
+@return	number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	page_cur_t*		cursor);/*!< out: page cursor */
+/****************************************************************//**
+Searches the right position for a page cursor. */
+UNIV_INTERN
+void
+page_cur_search_with_match(
+/*=======================*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	ulint*			iup_matched_fields,
+					/*!< in/out: already matched
+					fields in upper limit record */
+	ulint*			iup_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	ulint*			ilow_matched_fields,
+					/*!< in/out: already matched
+					fields in lower limit record */
+	ulint*			ilow_matched_bytes,
+					/*!< in/out: already matched
+					bytes in a field not yet
+					completely matched */
+	page_cur_t*		cursor);/*!< out: page cursor */
+/***********************************************************//**
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+UNIV_INTERN
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+	buf_block_t*	block,	/*!< in: page */
+	page_cur_t*	cursor);/*!< out: page cursor */
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a log record of a record insert on a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+	ibool		is_short,/*!< in: TRUE if short inserts */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/**********************************************************//**
+Parses a log record of copying a record list end to a new created page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses log record of a record delete on a page.
+@return	pointer to record end or NULL */
+UNIV_INTERN
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in: page or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+@return	true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+	const dict_index_t*	index,	/*!< in: The index that the record
+					belongs to */
+	page_cur_t*		pcur,	/*!< in/out: page cursor on record
+					to delete */
+	page_zip_des_t*		page_zip,/*!< in: compressed page descriptor */
+	const ulint*		offsets);/*!< in: offsets for record */
+
+/** Index page cursor */
+
+struct page_cur_t{
+	byte*		rec;	/*!< pointer to a record on page */
+	buf_block_t*	block;	/*!< pointer to the block containing rec */
+};
+
+#ifndef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
new file mode 100644
index 00000000000..028d33b17aa
--- /dev/null
+++ b/storage/innobase/include/page0cur.ic
@@ -0,0 +1,317 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/page0cur.ic
+The page cursor
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0page.h"
+#include "buf0types.h"
+
+#ifdef UNIV_DEBUG
+# include "rem0cmp.h"
+
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+
+	return(page_align(cur->rec));
+}
+
+/*********************************************************//**
+Gets pointer to the buffer block where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+buf_block_t*
+page_cur_get_block(
+/*===============*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+	return(cur->block);
+}
+
+/*********************************************************//**
+Gets pointer to the page frame where the cursor is positioned.
+@return	page */
+UNIV_INLINE
+page_zip_des_t*
+page_cur_get_page_zip(
+/*==================*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	return(buf_block_get_page_zip(page_cur_get_block(cur)));
+}
+
+/*********************************************************//**
+Gets the record where the cursor is positioned.
+@return	record */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+	page_cur_t*	cur)	/*!< in: page cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+
+	return(cur->rec);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************//**
+Sets the cursor object to point before the first user record
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur)	/*!< in: cursor */
+{
+	cur->block = (buf_block_t*) block;
+	cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block));
+}
+
+/*********************************************************//**
+Sets the cursor object to point after the last user record on
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+	const buf_block_t*	block,	/*!< in: index page */
+	page_cur_t*		cur)	/*!< in: cursor */
+{
+	cur->block = (buf_block_t*) block;
+	cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block));
+}
+
+/*********************************************************//**
+Returns TRUE if the cursor is before first user record on page.
+@return	TRUE if at start */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+	const page_cur_t*	cur)	/*!< in: cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+	return(page_rec_is_infimum(cur->rec));
+}
+
+/*********************************************************//**
+Returns TRUE if the cursor is after last user record.
+@return	TRUE if at end */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+	const page_cur_t*	cur)	/*!< in: cursor */
+{
+	ut_ad(cur);
+	ut_ad(page_align(cur->rec) == cur->block->frame);
+	return(page_rec_is_supremum(cur->rec));
+}
+
+/**********************************************************//**
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record on a page */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	page_cur_t*		cur)	/*!< out: page cursor */
+{
+	ut_ad(rec && block && cur);
+	ut_ad(page_align(rec) == block->frame);
+
+	cur->rec = (rec_t*) rec;
+	cur->block = (buf_block_t*) block;
+}
+
+/**********************************************************//**
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+	page_cur_t*	cur)	/*!< out: page cursor */
+{
+	ut_ad(cur);
+
+	cur->rec = NULL;
+	cur->block = NULL;
+}
+
+/**********************************************************//**
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+	page_cur_t*	cur)	/*!< in/out: cursor; must not be after last */
+{
+	ut_ad(!page_cur_is_after_last(cur));
+
+	cur->rec = page_rec_get_next(cur->rec);
+}
+
+/**********************************************************//**
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+	page_cur_t*	cur)	/*!< in/out: page cursor, not before first */
+{
+	ut_ad(!page_cur_is_before_first(cur));
+
+	cur->rec = page_rec_get_prev(cur->rec);
+}
+
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Searches the right position for a page cursor.
+@return	number of matched fields on the left */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+	const buf_block_t*	block,	/*!< in: buffer block */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		tuple,	/*!< in: data tuple */
+	ulint			mode,	/*!< in: PAGE_CUR_L,
+					PAGE_CUR_LE, PAGE_CUR_G, or
+					PAGE_CUR_GE */
+	page_cur_t*		cursor)	/*!< out: page cursor */
+{
+	ulint		low_matched_fields = 0;
+	ulint		low_matched_bytes = 0;
+	ulint		up_matched_fields = 0;
+	ulint		up_matched_bytes = 0;
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	page_cur_search_with_match(block, index, tuple, mode,
+				   &up_matched_fields,
+				   &up_matched_bytes,
+				   &low_matched_fields,
+				   &low_matched_bytes,
+				   cursor);
+	return(low_matched_fields);
+}
+
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const dtuple_t*	tuple,	/*!< in: pointer to a data tuple */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint**		offsets,/*!< out: offsets on *rec */
+	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+{
+	ulint		size
+		= rec_get_converted_size(index, tuple, n_ext);
+	rec_t*		rec;
+
+	if (!*heap) {
+		*heap = mem_heap_create(size
+					+ (4 + REC_OFFS_HEADER_SIZE
+					   + dtuple_get_n_fields(tuple))
+					* sizeof **offsets);
+	}
+
+	rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
+					index, tuple, n_ext);
+	*offsets = rec_get_offsets(
+		rec, index, *offsets, ULINT_UNDEFINED, heap);
+
+	if (buf_block_get_page_zip(cursor->block)) {
+		rec = page_cur_insert_rec_zip(
+			cursor, index, rec, *offsets, mtr);
+	} else {
+		rec = page_cur_insert_rec_low(cursor->rec,
+					      index, rec, *offsets, mtr);
+	}
+
+	ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
+	return(rec);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***********************************************************//**
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same logical position, but the physical position may change if it is
+pointing to a compressed page that was reorganized.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if this is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return	pointer to record if succeed, NULL otherwise */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+	page_cur_t*	cursor,	/*!< in/out: a page cursor */
+	const rec_t*	rec,	/*!< in: record to insert */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle, or NULL */
+{
+	if (buf_block_get_page_zip(cursor->block)) {
+		return(page_cur_insert_rec_zip(
+			       cursor, index, rec, offsets, mtr));
+	} else {
+		return(page_cur_insert_rec_low(cursor->rec,
+					       index, rec, offsets, mtr));
+	}
+}
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
new file mode 100644
index 00000000000..b572f7abb49
--- /dev/null
+++ b/storage/innobase/include/page0page.h
@@ -0,0 +1,1122 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0page.h
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0page_h
+#define page0page_h
+
+#include "univ.i"
+
+#include "page0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#include "data0data.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "fsp0fsp.h"
+#include "mtr0mtr.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/*			PAGE HEADER
+			===========
+
+Index page header starts at the first offset left free by the FIL-module */
+
+typedef	byte		page_header_t;
+
+#define	PAGE_HEADER	FSEG_PAGE_DATA	/* index page header starts at this
+				offset */
+/*-----------------------------*/
+#define PAGE_N_DIR_SLOTS 0	/* number of slots in page directory */
+#define	PAGE_HEAP_TOP	 2	/* pointer to record heap top */
+#define	PAGE_N_HEAP	 4	/* number of records in the heap,
+				bit 15=flag: new-style compact page format */
+#define	PAGE_FREE	 6	/* pointer to start of page free record list */
+#define	PAGE_GARBAGE	 8	/* number of bytes in deleted records */
+#define	PAGE_LAST_INSERT 10	/* pointer to the last inserted record, or
+				NULL if this info has been reset by a delete,
+				for example */
+#define	PAGE_DIRECTION	 12	/* last insert direction: PAGE_LEFT, ... */
+#define	PAGE_N_DIRECTION 14	/* number of consecutive inserts to the same
+				direction */
+#define	PAGE_N_RECS	 16	/* number of user records on the page */
+#define PAGE_MAX_TRX_ID	 18	/* highest id of a trx which may have modified
+				a record on the page; trx_id_t; defined only
+				in secondary indexes and in the insert buffer
+				tree */
+#define PAGE_HEADER_PRIV_END 26	/* end of private data structure of the page
+				header which are set in a page create */
+/*----*/
+#define	PAGE_LEVEL	 26	/* level of the node in an index tree; the
+				leaf level is the level 0.  This field should
+				not be written to after page creation. */
+#define	PAGE_INDEX_ID	 28	/* index id where the page belongs.
+				This field should not be written to after
+				page creation. */
+#define PAGE_BTR_SEG_LEAF 36	/* file segment header for the leaf pages in
+				a B-tree: defined only on the root page of a
+				B-tree, but not in the root of an ibuf tree */
+#define PAGE_BTR_IBUF_FREE_LIST	PAGE_BTR_SEG_LEAF
+#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
+				/* in the place of PAGE_BTR_SEG_LEAF and _TOP
+				there is a free list base node if the page is
+				the root page of an ibuf tree, and at the same
+				place is the free list node if the page is in
+				a free list */
+#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
+				/* file segment header for the non-leaf pages
+				in a B-tree: defined only on the root page of
+				a B-tree, but not in the root of an ibuf
+				tree */
+/*----*/
+#define PAGE_DATA	(PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
+				/* start of data on the page */
+
+#define PAGE_OLD_INFIMUM	(PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
+				/* offset of the page infimum record on an
+				old-style page */
+#define PAGE_OLD_SUPREMUM	(PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
+				/* offset of the page supremum record on an
+				old-style page */
+#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
+				/* offset of the page supremum record end on
+				an old-style page */
+#define PAGE_NEW_INFIMUM	(PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
+				/* offset of the page infimum record on a
+				new-style compact page */
+#define PAGE_NEW_SUPREMUM	(PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
+				/* offset of the page supremum record on a
+				new-style compact page */
+#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
+				/* offset of the page supremum record end on
+				a new-style compact page */
+/*-----------------------------*/
+
+/* Heap numbers */
+#define PAGE_HEAP_NO_INFIMUM	0	/* page infimum */
+#define PAGE_HEAP_NO_SUPREMUM	1	/* page supremum */
+#define PAGE_HEAP_NO_USER_LOW	2	/* first user record in
+					creation (insertion) order,
+					not necessarily collation order;
+					this record may have been deleted */
+
+/* Directions of cursor movement */
+#define	PAGE_LEFT		1
+#define	PAGE_RIGHT		2
+#define	PAGE_SAME_REC		3
+#define	PAGE_SAME_PAGE		4
+#define	PAGE_NO_DIRECTION	5
+
+/*			PAGE DIRECTORY
+			==============
+*/
+
+typedef	byte			page_dir_slot_t;
+typedef page_dir_slot_t		page_dir_t;
+
+/* Offset of the directory start down from the page end. We call the
+slot with the highest file address directory start, as it points to
+the first record in the list of records. */
+#define	PAGE_DIR		FIL_PAGE_DATA_END
+
+/* We define a slot in the page directory as two bytes */
+#define	PAGE_DIR_SLOT_SIZE	2
+
+/* The offset of the physically lower end of the directory, counted from
+page end, when the page is empty */
+#define PAGE_EMPTY_DIR_START	(PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
+
+/* The maximum and minimum number of records owned by a directory slot. The
+number may drop below the minimum in the first and the last slot in the
+directory. */
+#define PAGE_DIR_SLOT_MAX_N_OWNED	8
+#define	PAGE_DIR_SLOT_MIN_N_OWNED	4
+
+/************************************************************//**
+Gets the start of a page.
+@return	start of the page */
+UNIV_INLINE
+page_t*
+page_align(
+/*=======*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+		__attribute__((const));
+/************************************************************//**
+Gets the offset within a page.
+@return	offset from the start of the page */
+UNIV_INLINE
+ulint
+page_offset(
+/*========*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+		__attribute__((const));
+/*************************************************************//**
+Returns the max trx id field value. */
+UNIV_INLINE
+trx_id_t
+page_get_max_trx_id(
+/*================*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Sets the max trx id field value. */
+UNIV_INTERN
+void
+page_set_max_trx_id(
+/*================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction, or NULL */
+/*************************************************************//**
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr);	/*!< in/out: mini-transaction */
+/*************************************************************//**
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field);	/*!< in: PAGE_N_DIR_SLOTS, ... */
+/*************************************************************//**
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in: PAGE_N_DIR_SLOTS, ... */
+	ulint		val);	/*!< in: value */
+/*************************************************************//**
+Returns the offset stored in the given header field.
+@return	offset from the start of the page, or 0 */
+UNIV_INLINE
+ulint
+page_header_get_offs(
+/*=================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field)	/*!< in: PAGE_FREE, ... */
+	__attribute__((nonnull, pure));
+
+/*************************************************************//**
+Returns the pointer stored in the given header field, or NULL. */
+#define page_header_get_ptr(page, field)			\
+	(page_header_get_offs(page, field)			\
+	 ? page + page_header_get_offs(page, field) : NULL)
+/*************************************************************//**
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in/out: PAGE_FREE, ... */
+	const byte*	ptr);	/*!< in: pointer or NULL*/
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+	page_t*		page,	/*!< in: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	mtr_t*		mtr);	/*!< in: mtr */
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Gets the offset of the first record on the page.
+@return	offset of the first record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_infimum_offset(
+/*====================*/
+	const page_t*	page);	/*!< in: page which must have record(s) */
+/************************************************************//**
+Gets the offset of the last record on the page.
+@return	offset of the last record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_supremum_offset(
+/*=====================*/
+	const page_t*	page);	/*!< in: page which must have record(s) */
+#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
+#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
+
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INTERN
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		nth)	/*!< in: nth record */
+	__attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+	page_t*	page,	/*< in: page */
+	ulint	nth)	/*!< in: nth record */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return	middle record */
+UNIV_INLINE
+rec_t*
+page_get_middle_rec(
+/*================*/
+	page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record on a page; may also
+				be page infimum or supremum, in which case
+				matched-parameter values below are not
+				affected */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when function returns
+				contains the value for current comparison */
+	ulint*		matched_bytes); /*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when function returns contains the
+				value for current comparison */
+#endif /* !UNIV_HOTBACKUP */
+/*************************************************************//**
+Gets the page number.
+@return	page number */
+UNIV_INLINE
+ulint
+page_get_page_no(
+/*=============*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Gets the tablespace identifier.
+@return	space id */
+UNIV_INLINE
+ulint
+page_get_space_id(
+/*==============*/
+	const page_t*	page);	/*!< in: page */
+/*************************************************************//**
+Gets the number of user records on page (the infimum and supremum records
+are not user records).
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+	const page_t*	page);	/*!< in: index page */
+/***************************************************************//**
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records.
+This is the inverse function of page_rec_get_nth().
+@return	number of records */
+UNIV_INTERN
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+	const rec_t*	rec);	/*!< in: the physical record */
+/*************************************************************//**
+Gets the number of records in the heap.
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+	const page_t*	page);	/*!< in: index page */
+/*************************************************************//**
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL.
+				Note that the size of the dense page directory
+				in the compressed page trailer is
+				n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
+	ulint		n_heap);/*!< in: number of records */
+/*************************************************************//**
+Gets the number of dir slots in directory.
+@return	number of slots */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+	const page_t*	page);	/*!< in: index page */
+/*************************************************************//**
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		n_slots);/*!< in: number of slots */
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Gets pointer to nth directory slot.
+@return	pointer to dir slot */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n);	/*!< in: position */
+#else /* UNIV_DEBUG */
+# define page_dir_get_nth_slot(page, n)		\
+	((page) + UNIV_PAGE_SIZE - PAGE_DIR	\
+	 - (n + 1) * PAGE_DIR_SLOT_SIZE)
+#endif /* UNIV_DEBUG */
+/**************************************************************//**
+Used to check the consistency of a record on a page.
+@return	TRUE if succeed */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+	const rec_t*	rec);	/*!< in: record */
+/***************************************************************//**
+Gets the record pointed to by a directory slot.
+@return	pointer to record */
+UNIV_INLINE
+const rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+	const page_dir_slot_t*	slot);	/*!< in: directory slot */
+/***************************************************************//**
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+	page_dir_slot_t* slot,	/*!< in: directory slot */
+	rec_t*		 rec);	/*!< in: record on the page */
+/***************************************************************//**
+Gets the number of records owned by a directory slot.
+@return	number of records */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+	const page_dir_slot_t*	slot);	/*!< in: page directory slot */
+/***************************************************************//**
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+	page_dir_slot_t*slot,	/*!< in/out: directory slot */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n);	/*!< in: number of records owned by the slot */
+/************************************************************//**
+Calculates the space reserved for directory slots of a given
+number of records. The exact value is a fraction number
+n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
+rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+	ulint	n_recs);	/*!< in: number of records */
+/***************************************************************//**
+Looks for the directory slot which owns the given record.
+@return	the directory slot number */
+UNIV_INTERN
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+	const rec_t*	rec);	/*!< in: the physical record */
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
+UNIV_INLINE
+ulint
+page_is_comp(
+/*=========*/
+	const page_t*	page);	/*!< in: index page */
+/************************************************************//**
+TRUE if the record is on a page in compact format.
+@return	nonzero if in compact format */
+UNIV_INLINE
+ulint
+page_rec_is_comp(
+/*=============*/
+	const rec_t*	rec);	/*!< in: record */
+/***************************************************************//**
+Returns the heap number of a record.
+@return	heap number */
+UNIV_INLINE
+ulint
+page_rec_get_heap_no(
+/*=================*/
+	const rec_t*	rec);	/*!< in: the physical record */
+/************************************************************//**
+Determine whether the page is a B-tree leaf.
+@return	true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
+UNIV_INLINE
+bool
+page_is_leaf(
+/*=========*/
+	const page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, pure));
+/************************************************************//**
+Determine whether the page is empty.
+@return	true if the page is empty (PAGE_N_RECS = 0) */
+UNIV_INLINE
+bool
+page_is_empty(
+/*==========*/
+	const page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, pure));
+/************************************************************//**
+Determine whether the page contains garbage.
+@return	true if the page contains garbage (PAGE_GARBAGE is not 0) */
+UNIV_INLINE
+bool
+page_has_garbage(
+/*=============*/
+	const page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, pure));
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_low(
+/*==================*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	ulint		comp);	/*!< in: nonzero=compact page layout */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+	rec_t*	rec);	/*!< in: pointer to record */
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_const(
+/*====================*/
+	const rec_t*	rec);	/*!< in: pointer to record */
+/************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return	pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+	const rec_t*	rec);	/*!< in: pointer to record */
+/************************************************************//**
+Sets the pointer to the next record on the page. */
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+	rec_t*		rec,	/*!< in: pointer to record,
+				must not be page supremum */
+	const rec_t*	next);	/*!< in: pointer to next record,
+				must not be page infimum */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+	const rec_t*	rec);	/*!< in: pointer to record, must not be page
+				infimum */
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+	rec_t*		rec);	/*!< in: pointer to record,
+				must not be page infimum */
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+	__attribute__((const));
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+	__attribute__((const));
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum_low(
+/*====================*/
+	ulint	offset)	/*!< in: record offset on page */
+	__attribute__((const));
+
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+	__attribute__((const));
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+	__attribute__((const));
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+	const rec_t*	rec)	/*!< in: record */
+	__attribute__((const));
+/***************************************************************//**
+Looks for the record which owns the given record.
+@return	the owner record */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+	rec_t*	rec);	/*!< in: the physical record */
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Write a 32-bit field in a data dictionary record. */
+UNIV_INLINE
+void
+page_rec_write_field(
+/*=================*/
+	rec_t*	rec,	/*!< in/out: record to update */
+	ulint	i,	/*!< in: index of the field to update */
+	ulint	val,	/*!< in: value to write */
+	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/************************************************************//**
+Returns the maximum combined size of records which can be inserted on top
+of record heap.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs);/*!< in: number of records */
+/************************************************************//**
+Returns the maximum combined size of records which can be inserted on top
+of record heap if page is first reorganized.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs);/*!< in: number of records */
+/*************************************************************//**
+Calculates free space if a page is emptied.
+@return	free space */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(
+/*=========================*/
+	ulint	comp)	/*!< in: nonzero=compact page format */
+		__attribute__((const));
+/**********************************************************//**
+Returns the base extra size of a physical record.  This is the
+size of the fixed header, independent of the record size.
+@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+UNIV_INLINE
+ulint
+page_rec_get_base_extra_size(
+/*=========================*/
+	const rec_t*	rec);	/*!< in: physical record */
+/************************************************************//**
+Returns the sum of the sizes of the records in the record list
+excluding the infimum and supremum records.
+@return	data in bytes */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+	const page_t*	page);	/*!< in: index page */
+/************************************************************//**
+Allocates a block of memory from the head of the free list
+of an index page. */
+UNIV_INLINE
+void
+page_mem_alloc_free(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	rec_t*		next_rec,/*!< in: pointer to the new head of the
+				free record list */
+	ulint		need);	/*!< in: number of bytes allocated */
+/************************************************************//**
+Allocates a block of memory from the heap of an index page.
+@return	pointer to start of allocated buffer, or NULL if allocation fails */
+UNIV_INTERN
+byte*
+page_mem_alloc_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	ulint		need,	/*!< in: total number of bytes needed */
+	ulint*		heap_no);/*!< out: this contains the heap number
+				of the allocated record
+				if allocation succeeds */
+/************************************************************//**
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+	page_t*			page,	/*!< in/out: index page */
+	page_zip_des_t*		page_zip,/*!< in/out: compressed page,
+					 or NULL */
+	rec_t*			rec,	/*!< in: pointer to the (origin of)
+					record */
+	const dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*		offsets);/*!< in: array returned by
+					 rec_get_offsets() */
+/**********************************************************//**
+Create an uncompressed B-tree index page.
+@return	pointer to the page */
+UNIV_INTERN
+page_t*
+page_create(
+/*========*/
+	buf_block_t*	block,		/*!< in: a buffer block where the
+					page is created */
+	mtr_t*		mtr,		/*!< in: mini-transaction handle */
+	ulint		comp);		/*!< in: nonzero=compact page format */
+/**********************************************************//**
+Create a compressed B-tree index page.
+@return	pointer to the page */
+UNIV_INTERN
+page_t*
+page_create_zip(
+/*============*/
+	buf_block_t*	block,		/*!< in/out: a buffer frame where the
+					page is created */
+	dict_index_t*	index,		/*!< in: the index of the page */
+	ulint		level,		/*!< in: the B-tree level of the page */
+	trx_id_t	max_trx_id,	/*!< in: PAGE_MAX_TRX_ID */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/**********************************************************//**
+Empty a previously created B-tree index page. */
+UNIV_INTERN
+void
+page_create_empty(
+/*==============*/
+	buf_block_t*	block,	/*!< in/out: B-tree block */
+	dict_index_t*	index,	/*!< in: the index of the page */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull(1,2)));
+/*************************************************************//**
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page or compress the page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit(). */
+UNIV_INTERN
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+	buf_block_t*	new_block,	/*!< in: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page of rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*************************************************************//**
+Copies records from page to new_page, from the given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return pointer to the original successor of the infimum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_end(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page containing rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Copies records from page to new_page, up to the given record, NOT
+including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return pointer to the original predecessor of the supremum record on
+new_page, or NULL on zip overflow (new_block will be decompressed) */
+UNIV_INTERN
+rec_t*
+page_copy_rec_list_start(
+/*=====================*/
+	buf_block_t*	new_block,	/*!< in/out: index page to copy to */
+	buf_block_t*	block,		/*!< in: index page containing rec */
+	rec_t*		rec,		/*!< in: record on page */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_end(
+/*=====================*/
+	rec_t*		rec,	/*!< in: pointer to record on page */
+	buf_block_t*	block,	/*!< in: buffer block of the page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	ulint		n_recs,	/*!< in: number of records to delete,
+				or ULINT_UNDEFINED if not known */
+	ulint		size,	/*!< in: the sum of the sizes of the
+				records in the end of the chain to
+				delete, or ULINT_UNDEFINED if not known */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+UNIV_INTERN
+void
+page_delete_rec_list_start(
+/*=======================*/
+	rec_t*		rec,	/*!< in: record on page */
+	buf_block_t*	block,	/*!< in: buffer block of the page */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
+/*************************************************************//**
+Moves record list end to another page. Moved records include
+split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return TRUE on success; FALSE on compression failure (new_block will
+be decompressed) */
+UNIV_INTERN
+ibool
+page_move_rec_list_end(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: index page where to move */
+	buf_block_t*	block,		/*!< in: index page from where to move */
+	rec_t*		split_rec,	/*!< in: first record to move */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull(1, 2, 4, 5)));
+/*************************************************************//**
+Moves record list start to another page. Moved records do not include
+split_rec.
+
+IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
+if new_block is a compressed leaf page in a secondary index.
+This has to be done either within the same mini-transaction,
+or by invoking ibuf_reset_free_bits() before mtr_commit().
+
+@return	TRUE on success; FALSE on compression failure */
+UNIV_INTERN
+ibool
+page_move_rec_list_start(
+/*=====================*/
+	buf_block_t*	new_block,	/*!< in/out: index page where to move */
+	buf_block_t*	block,		/*!< in/out: page containing split_rec */
+	rec_t*		split_rec,	/*!< in: first record not to move */
+	dict_index_t*	index,		/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull(1, 2, 4, 5)));
+/****************************************************************//**
+Splits a directory slot which owns too many records. */
+UNIV_INTERN
+void
+page_dir_split_slot(
+/*================*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be written, or NULL */
+	ulint		slot_no)/*!< in: the directory slot */
+	__attribute__((nonnull(1)));
+/*************************************************************//**
+Tries to balance the given directory slot with too few records
+with the upper neighbor, so that there are at least the minimum number
+of records owned by the slot; this may result in the merging of
+two slots. */
+UNIV_INTERN
+void
+page_dir_balance_slot(
+/*==================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		slot_no)/*!< in: the directory slot */
+	__attribute__((nonnull(1)));
+/**********************************************************//**
+Parses a log record of a record list end or start deletion.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+	byte		type,	/*!< in: MLOG_LIST_END_DELETE,
+				MLOG_LIST_START_DELETE,
+				MLOG_COMP_LIST_END_DELETE or
+				MLOG_COMP_LIST_START_DELETE */
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	buf_block_t*	block,	/*!< in/out: buffer block or NULL */
+	dict_index_t*	index,	/*!< in: record descriptor */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses a redo log record of creating a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_parse_create(
+/*==============*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	ulint		comp,	/*!< in: nonzero=compact page format */
+	buf_block_t*	block,	/*!< in: block or NULL */
+	mtr_t*		mtr);	/*!< in: mtr or NULL */
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Prints record contents including the data relevant only in
+the index page context. */
+UNIV_INTERN
+void
+page_rec_print(
+/*===========*/
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: record descriptor */
+# ifdef UNIV_BTR_PRINT
+/***************************************************************//**
+This is used to print the contents of the directory for
+debugging purposes. */
+UNIV_INTERN
+void
+page_dir_print(
+/*===========*/
+	page_t*	page,	/*!< in: index page */
+	ulint	pr_n);	/*!< in: print n first and n last entries */
+/***************************************************************//**
+This is used to print the contents of the page record list for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print_list(
+/*============*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: dictionary index of the page */
+	ulint		pr_n);	/*!< in: print n first and n last entries */
+/***************************************************************//**
+Prints the info in a page header. */
+UNIV_INTERN
+void
+page_header_print(
+/*==============*/
+	const page_t*	page);	/*!< in: index page */
+/***************************************************************//**
+This is used to print the contents of the page for
+debugging purposes. */
+UNIV_INTERN
+void
+page_print(
+/*=======*/
+	buf_block_t*	block,	/*!< in: index page */
+	dict_index_t*	index,	/*!< in: dictionary index of the page */
+	ulint		dn,	/*!< in: print dn first and last entries
+				in directory */
+	ulint		rn);	/*!< in: print rn first and last records
+				in directory */
+# endif /* UNIV_BTR_PRINT */
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_rec_validate(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Checks that the first directory slot points to the infimum record and
+the last to the supremum. This function is intended to track if the
+bug fixed in 4.0.14 has caused corruption to users' databases. */
+UNIV_INTERN
+void
+page_check_dir(
+/*===========*/
+	const page_t*	page);	/*!< in: index page */
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_old(
+/*=====================*/
+	const page_t*	page);	/*!< in: index page in ROW_FORMAT=REDUNDANT */
+/***************************************************************//**
+This function checks the consistency of an index page when we do not
+know the index. This is also resilient so that this should never crash
+even if the page is total garbage.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_simple_validate_new(
+/*=====================*/
+	const page_t*	page);	/*!< in: index page in ROW_FORMAT!=REDUNDANT */
+/***************************************************************//**
+This function checks the consistency of an index page.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+page_validate(
+/*==========*/
+	const page_t*	page,	/*!< in: index page */
+	dict_index_t*	index);	/*!< in: data dictionary index containing
+				the page record type definition */
+/***************************************************************//**
+Looks in the page record list for a record with the given heap number.
+@return	record, NULL if not found */
+
+const rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		heap_no);/*!< in: heap number */
+/** Get the last non-delete-marked record on a page.
+@param[in]	page	index tree leaf page
+@return the last record, not delete-marked
+@retval infimum record if all records are delete-marked */
+
+const rec_t*
+page_find_rec_max_not_deleted(
+	const page_t*	page);
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE  UNIV_INLINE_ORIGINAL
+#endif
+
+#ifndef UNIV_NONINL
+#include "page0page.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
new file mode 100644
index 00000000000..9b81156708f
--- /dev/null
+++ b/storage/innobase/include/page0page.ic
@@ -0,0 +1,1176 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0page.ic
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#ifdef UNIV_DEBUG
+# include "log0recv.h"
+#endif /* !UNIV_DEBUG */
+#ifndef UNIV_HOTBACKUP
+# include "rem0cmp.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "mtr0log.h"
+#include "page0zip.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/************************************************************//**
+Gets the start of a page.
+@return	start of the page */
+UNIV_INLINE
+page_t*
+page_align(
+/*=======*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+{
+	return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
+}
+/************************************************************//**
+Gets the offset within a page.
+@return	offset from the start of the page */
+UNIV_INLINE
+ulint
+page_offset(
+/*========*/
+	const void*	ptr)	/*!< in: pointer to page frame */
+{
+	return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
+}
+/*************************************************************//**
+Returns the max trx id field value. */
+UNIV_INLINE
+trx_id_t
+page_get_max_trx_id(
+/*================*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page);
+
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
+}
+
+/*************************************************************//**
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+	buf_block_t*	block,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	ut_ad(block);
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* During crash recovery, this function may be called on
+	something else than a leaf page of a secondary index or the
+	insert buffer index tree (dict_index_is_sec_or_ibuf() returns
+	TRUE for the dummy indexes constructed during redo log
+	application).  In that case, PAGE_MAX_TRX_ID is unused,
+	and trx_id is usually zero. */
+	ut_ad(trx_id || recv_recovery_is_on());
+	ut_ad(page_is_leaf(buf_block_get_frame(block)));
+
+	if (page_get_max_trx_id(buf_block_get_frame(block)) < trx_id) {
+
+		page_set_max_trx_id(block, page_zip, trx_id, mtr);
+	}
+}
+
+/*************************************************************//**
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field)	/*!< in: PAGE_LEVEL, ... */
+{
+	ut_ad(page);
+	ut_ad(field <= PAGE_INDEX_ID);
+
+	return(mach_read_from_2(page + PAGE_HEADER + field));
+}
+
+/*************************************************************//**
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in: PAGE_N_DIR_SLOTS, ... */
+	ulint		val)	/*!< in: value */
+{
+	ut_ad(page);
+	ut_ad(field <= PAGE_N_RECS);
+	ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
+	ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
+
+	mach_write_to_2(page + PAGE_HEADER + field, val);
+	if (page_zip) {
+		page_zip_write_header(page_zip,
+				      page + PAGE_HEADER + field, 2, NULL);
+	}
+}
+
+/*************************************************************//**
+Returns the offset stored in the given header field.
+@return	offset from the start of the page, or 0 */
+UNIV_INLINE
+ulint
+page_header_get_offs(
+/*=================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		field)	/*!< in: PAGE_FREE, ... */
+{
+	ulint	offs;
+
+	ut_ad(page);
+	ut_ad((field == PAGE_FREE)
+	      || (field == PAGE_LAST_INSERT)
+	      || (field == PAGE_HEAP_TOP));
+
+	offs = page_header_get_field(page, field);
+
+	ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+	return(offs);
+}
+
+/*************************************************************//**
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+	page_t*		page,	/*!< in: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		field,	/*!< in: PAGE_FREE, ... */
+	const byte*	ptr)	/*!< in: pointer or NULL*/
+{
+	ulint	offs;
+
+	ut_ad(page);
+	ut_ad((field == PAGE_FREE)
+	      || (field == PAGE_LAST_INSERT)
+	      || (field == PAGE_HEAP_TOP));
+
+	if (ptr == NULL) {
+		offs = 0;
+	} else {
+		offs = ptr - page;
+	}
+
+	ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+	page_header_set_field(page, page_zip, field, offs);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*************************************************************//**
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ut_ad(page && mtr);
+
+	if (page_zip) {
+		mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
+		page_zip_write_header(page_zip,
+				      page + (PAGE_HEADER + PAGE_LAST_INSERT),
+				      2, mtr);
+	} else {
+		mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
+				 MLOG_2BYTES, mtr);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/************************************************************//**
+Determine whether the page is in new-style compact format.
+@return nonzero if the page is in compact format, zero if it is in
+old-style format */
+UNIV_INLINE
+ulint
+page_is_comp(
+/*=========*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
+}
+
+/************************************************************//**
+TRUE if the record is on a page in compact format.
+@return	nonzero if in compact format */
+UNIV_INLINE
+ulint
+page_rec_is_comp(
+/*=============*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	return(page_is_comp(page_align(rec)));
+}
+
+/***************************************************************//**
+Returns the heap number of a record.
+@return	heap number */
+UNIV_INLINE
+ulint
+page_rec_get_heap_no(
+/*=================*/
+	const rec_t*	rec)	/*!< in: the physical record */
+{
+	if (page_rec_is_comp(rec)) {
+		return(rec_get_heap_no_new(rec));
+	} else {
+		return(rec_get_heap_no_old(rec));
+	}
+}
+
+/************************************************************//**
+Determine whether the page is a B-tree leaf.
+@return	true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
+UNIV_INLINE
+bool
+page_is_leaf(
+/*=========*/
+	const page_t*	page)	/*!< in: page */
+{
+	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
+}
+
+/************************************************************//**
+Determine whether the page is empty.
+@return	true if the page is empty (PAGE_N_RECS = 0) */
+UNIV_INLINE
+bool
+page_is_empty(
+/*==========*/
+	const page_t*	page)	/*!< in: page */
+{
+	return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
+}
+
+/************************************************************//**
+Determine whether the page contains garbage.
+@return	true if the page contains garbage (PAGE_GARBAGE is not 0) */
+UNIV_INLINE
+bool
+page_has_garbage(
+/*=============*/
+	const page_t*	page)	/*!< in: page */
+{
+	return(!!*(const uint16*) (page + (PAGE_HEADER + PAGE_GARBAGE)));
+}
+
+/************************************************************//**
+Gets the offset of the first record on the page.
+@return	offset of the first record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_infimum_offset(
+/*====================*/
+	const page_t*	page)	/*!< in: page which must have record(s) */
+{
+	ut_ad(page);
+	ut_ad(!page_offset(page));
+
+	if (page_is_comp(page)) {
+		return(PAGE_NEW_INFIMUM);
+	} else {
+		return(PAGE_OLD_INFIMUM);
+	}
+}
+
+/************************************************************//**
+Gets the offset of the last record on the page.
+@return	offset of the last record in record list, relative from page */
+UNIV_INLINE
+ulint
+page_get_supremum_offset(
+/*=====================*/
+	const page_t*	page)	/*!< in: page which must have record(s) */
+{
+	ut_ad(page);
+	ut_ad(!page_offset(page));
+
+	if (page_is_comp(page)) {
+		return(PAGE_NEW_SUPREMUM);
+	} else {
+		return(PAGE_OLD_SUPREMUM);
+	}
+}
+
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+{
+	ut_ad(offset >= PAGE_NEW_INFIMUM);
+#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
+# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
+#endif
+#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
+# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
+#endif
+#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
+# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
+#endif
+#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
+# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
+#endif
+#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
+# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
+#endif
+#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
+# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
+#endif
+	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+
+	return(offset != PAGE_NEW_SUPREMUM
+	       && offset != PAGE_NEW_INFIMUM
+	       && offset != PAGE_OLD_INFIMUM
+	       && offset != PAGE_OLD_SUPREMUM);
+}
+
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum_low(
+/*=====================*/
+	ulint	offset)	/*!< in: record offset on page */
+{
+	ut_ad(offset >= PAGE_NEW_INFIMUM);
+	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+
+	return(offset == PAGE_NEW_SUPREMUM
+	       || offset == PAGE_OLD_SUPREMUM);
+}
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum_low(
+/*====================*/
+	ulint	offset)	/*!< in: record offset on page */
+{
+	ut_ad(offset >= PAGE_NEW_INFIMUM);
+	ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+
+	return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
+}
+
+/************************************************************//**
+TRUE if the record is a user record on the page.
+@return	TRUE if a user record */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	ut_ad(page_rec_check(rec));
+
+	return(page_rec_is_user_rec_low(page_offset(rec)));
+}
+
+/************************************************************//**
+TRUE if the record is the supremum record on a page.
+@return	TRUE if the supremum record */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	ut_ad(page_rec_check(rec));
+
+	return(page_rec_is_supremum_low(page_offset(rec)));
+}
+
+/************************************************************//**
+TRUE if the record is the infimum record on a page.
+@return	TRUE if the infimum record */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	ut_ad(page_rec_check(rec));
+
+	return(page_rec_is_infimum_low(page_offset(rec)));
+}
+
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+	page_t*	page,	/*!< in: page */
+	ulint	nth)	/*!< in: nth record */
+{
+	return((rec_t*) page_rec_get_nth_const(page, nth));
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return	middle record */
+UNIV_INLINE
+rec_t*
+page_get_middle_rec(
+/*================*/
+	page_t*	page)	/*!< in: page */
+{
+	ulint	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+
+	return(page_rec_get_nth(page, middle));
+}
+
+/*************************************************************//**
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record on a page; may also
+				be page infimum or supremum, in which case
+				matched-parameter values below are not
+				affected */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when function returns
+				contains the value for current comparison */
+	ulint*		matched_bytes) /*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when function returns contains the
+				value for current comparison */
+{
+	ulint	rec_offset;
+
+	ut_ad(dtuple_check_typed(dtuple));
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
+
+	rec_offset = page_offset(rec);
+
+	if (rec_offset == PAGE_NEW_INFIMUM
+	    || rec_offset == PAGE_OLD_INFIMUM) {
+
+		return(1);
+
+	} else if (rec_offset == PAGE_NEW_SUPREMUM
+		   || rec_offset == PAGE_OLD_SUPREMUM) {
+
+		return(-1);
+	}
+
+	return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
+					 matched_fields,
+					 matched_bytes));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*************************************************************//**
+Gets the page number.
+@return	page number */
+UNIV_INLINE
+ulint
+page_get_page_no(
+/*=============*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page == page_align((page_t*) page));
+	return(mach_read_from_4(page + FIL_PAGE_OFFSET));
+}
+
+/*************************************************************//**
+Gets the tablespace identifier.
+@return	space id */
+UNIV_INLINE
+ulint
+page_get_space_id(
+/*==============*/
+	const page_t*	page)	/*!< in: page */
+{
+	ut_ad(page == page_align((page_t*) page));
+	return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+}
+
+/*************************************************************//**
+Gets the number of user records on page (infimum and supremum records
+are not user records).
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_RECS));
+}
+
+/*************************************************************//**
+Gets the number of dir slots in directory.
+@return	number of slots */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
+}
+/*************************************************************//**
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+	page_t*		page,	/*!< in/out: page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	ulint		n_slots)/*!< in: number of slots */
+{
+	page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
+}
+
+/*************************************************************//**
+Gets the number of records in the heap.
+@return	number of user records */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+	const page_t*	page)	/*!< in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
+}
+
+/*************************************************************//**
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL.
+				Note that the size of the dense page directory
+				in the compressed page trailer is
+				n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
+	ulint		n_heap)	/*!< in: number of records */
+{
+	ut_ad(n_heap < 0x8000);
+	ut_ad(!page_zip || n_heap
+	      == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1);
+
+	page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap
+			      | (0x8000
+				 & page_header_get_field(page, PAGE_N_HEAP)));
+}
+
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Gets pointer to nth directory slot.
+@return	pointer to dir slot */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n)	/*!< in: position */
+{
+	ut_ad(page_dir_get_n_slots(page) > n);
+
+	return((page_dir_slot_t*)
+	       page + UNIV_PAGE_SIZE - PAGE_DIR
+	       - (n + 1) * PAGE_DIR_SLOT_SIZE);
+}
+#endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+Used to check the consistency of a record on a page.
+@return	TRUE if succeed */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+	const rec_t*	rec)	/*!< in: record */
+{
+	const page_t*	page = page_align(rec);
+
+	ut_a(rec);
+
+	ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP));
+	ut_a(page_offset(rec) >= PAGE_DATA);
+
+	return(TRUE);
+}
+
+/***************************************************************//**
+Gets the record pointed to by a directory slot.
+@return	pointer to record */
+UNIV_INLINE
+const rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+	const page_dir_slot_t*	slot)	/*!< in: directory slot */
+{
+	return(page_align(slot) + mach_read_from_2(slot));
+}
+
+/***************************************************************//**
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+	page_dir_slot_t* slot,	/*!< in: directory slot */
+	rec_t*		 rec)	/*!< in: record on the page */
+{
+	ut_ad(page_rec_check(rec));
+
+	mach_write_to_2(slot, page_offset(rec));
+}
+
+/***************************************************************//**
+Gets the number of records owned by a directory slot.
+@return	number of records */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+	const page_dir_slot_t*	slot)	/*!< in: page directory slot */
+{
+	const rec_t*	rec	= page_dir_slot_get_rec(slot);
+	if (page_rec_is_comp(slot)) {
+		return(rec_get_n_owned_new(rec));
+	} else {
+		return(rec_get_n_owned_old(rec));
+	}
+}
+
+/***************************************************************//**
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+	page_dir_slot_t*slot,	/*!< in/out: directory slot */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n)	/*!< in: number of records owned by the slot */
+{
+	rec_t*	rec	= (rec_t*) page_dir_slot_get_rec(slot);
+	if (page_rec_is_comp(slot)) {
+		rec_set_n_owned_new(rec, page_zip, n);
+	} else {
+		ut_ad(!page_zip);
+		rec_set_n_owned_old(rec, n);
+	}
+}
+
+/************************************************************//**
+Calculates the space reserved for directory slots of a given number of
+records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
+PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+	ulint	n_recs)		/*!< in: number of records */
+{
+	return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
+	       / PAGE_DIR_SLOT_MIN_N_OWNED);
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_low(
+/*==================*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	ulint		comp)	/*!< in: nonzero=compact page layout */
+{
+	ulint		offs;
+	const page_t*	page;
+
+	ut_ad(page_rec_check(rec));
+
+	page = page_align(rec);
+
+	offs = rec_get_next_offs(rec, comp);
+
+	if (offs >= UNIV_PAGE_SIZE) {
+		fprintf(stderr,
+			"InnoDB: Next record offset is nonsensical %lu"
+			" in record at offset %lu\n"
+			"InnoDB: rec address %p, space id %lu, page %lu\n",
+			(ulong) offs, (ulong) page_offset(rec),
+			(void*) rec,
+			(ulong) page_get_space_id(page),
+			(ulong) page_get_page_no(page));
+		buf_page_print(page, 0, 0);
+
+		ut_error;
+	} else if (offs == 0) {
+
+		return(NULL);
+	}
+
+	return(page + offs);
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+	rec_t*	rec)	/*!< in: pointer to record */
+{
+	return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec)));
+}
+
+/************************************************************//**
+Gets the pointer to the next record on the page.
+@return	pointer to next record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_const(
+/*====================*/
+	const rec_t*	rec)	/*!< in: pointer to record */
+{
+	return(page_rec_get_next_low(rec, page_rec_is_comp(rec)));
+}
+
+/************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return	pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+	const rec_t*	rec)	/*!< in: pointer to record */
+{
+	const rec_t*	r;
+	ulint		page_is_compact = page_rec_is_comp(rec);
+
+	for (r = page_rec_get_next_const(rec);
+	     !page_rec_is_supremum(r)
+	     && rec_get_deleted_flag(r, page_is_compact);
+	     r = page_rec_get_next_const(r)) {
+		/* noop */
+	}
+
+	return(r);
+}
+
+/************************************************************//**
+Sets the pointer to the next record on the page. */
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+	rec_t*		rec,	/*!< in: pointer to record,
+				must not be page supremum */
+	const rec_t*	next)	/*!< in: pointer to next record,
+				must not be page infimum */
+{
+	ulint	offs;
+
+	ut_ad(page_rec_check(rec));
+	ut_ad(!page_rec_is_supremum(rec));
+	ut_ad(rec != next);
+
+	ut_ad(!next || !page_rec_is_infimum(next));
+	ut_ad(!next || page_align(rec) == page_align(next));
+
+	offs = next != NULL ? page_offset(next) : 0;
+
+	if (page_rec_is_comp(rec)) {
+		rec_set_next_offs_new(rec, offs);
+	} else {
+		rec_set_next_offs_old(rec, offs);
+	}
+}
+
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+const rec_t*
+page_rec_get_prev_const(
+/*====================*/
+	const rec_t*	rec)	/*!< in: pointer to record, must not be page
+				infimum */
+{
+	const page_dir_slot_t*	slot;
+	ulint			slot_no;
+	const rec_t*		rec2;
+	const rec_t*		prev_rec = NULL;
+	const page_t*		page;
+
+	ut_ad(page_rec_check(rec));
+
+	page = page_align(rec);
+
+	ut_ad(!page_rec_is_infimum(rec));
+
+	slot_no = page_dir_find_owner_slot(rec);
+
+	ut_a(slot_no != 0);
+
+	slot = page_dir_get_nth_slot(page, slot_no - 1);
+
+	rec2 = page_dir_slot_get_rec(slot);
+
+	if (page_is_comp(page)) {
+		while (rec != rec2) {
+			prev_rec = rec2;
+			rec2 = page_rec_get_next_low(rec2, TRUE);
+		}
+	} else {
+		while (rec != rec2) {
+			prev_rec = rec2;
+			rec2 = page_rec_get_next_low(rec2, FALSE);
+		}
+	}
+
+	ut_a(prev_rec);
+
+	return(prev_rec);
+}
+
+/************************************************************//**
+Gets the pointer to the previous record.
+@return	pointer to previous record */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+	rec_t*	rec)	/*!< in: pointer to record, must not be page
+			infimum */
+{
+	return((rec_t*) page_rec_get_prev_const(rec));
+}
+
+/***************************************************************//**
+Looks for the record which owns the given record.
+@return	the owner record */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+	rec_t*	rec)	/*!< in: the physical record */
+{
+	ut_ad(page_rec_check(rec));
+
+	if (page_rec_is_comp(rec)) {
+		while (rec_get_n_owned_new(rec) == 0) {
+			rec = page_rec_get_next(rec);
+		}
+	} else {
+		while (rec_get_n_owned_old(rec) == 0) {
+			rec = page_rec_get_next(rec);
+		}
+	}
+
+	return(rec);
+}
+
+/**********************************************************//**
+Returns the base extra size of a physical record.  This is the
+size of the fixed header, independent of the record size.
+@return	REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
+UNIV_INLINE
+ulint
+page_rec_get_base_extra_size(
+/*=========================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
+# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
+#endif
+	return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
+}
+
+/************************************************************//**
+Returns the sum of the sizes of the records in the record list, excluding
+the infimum and supremum records.
+@return	data in bytes */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+	const page_t*	page)	/*!< in: index page */
+{
+	ulint	ret;
+
+	ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
+		      - (page_is_comp(page)
+			 ? PAGE_NEW_SUPREMUM_END
+			 : PAGE_OLD_SUPREMUM_END)
+		      - page_header_get_field(page, PAGE_GARBAGE));
+
+	ut_ad(ret < UNIV_PAGE_SIZE);
+
+	return(ret);
+}
+
+
+/************************************************************//**
+Allocates a block of memory from the free list of an index page. */
+UNIV_INLINE
+void
+page_mem_alloc_free(
+/*================*/
+	page_t*		page,	/*!< in/out: index page */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page with enough
+				space available for inserting the record,
+				or NULL */
+	rec_t*		next_rec,/*!< in: pointer to the new head of the
+				free record list */
+	ulint		need)	/*!< in: number of bytes allocated */
+{
+	ulint		garbage;
+
+#ifdef UNIV_DEBUG
+	const rec_t*	old_rec	= page_header_get_ptr(page, PAGE_FREE);
+	ulint		next_offs;
+
+	ut_ad(old_rec);
+	next_offs = rec_get_next_offs(old_rec, page_is_comp(page));
+	ut_ad(next_rec == (next_offs ? page + next_offs : NULL));
+#endif
+
+	page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec);
+
+	garbage = page_header_get_field(page, PAGE_GARBAGE);
+	ut_ad(garbage >= need);
+
+	page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need);
+}
+
+/*************************************************************//**
+Calculates free space if a page is emptied.
+@return	free space */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(
+/*=========================*/
+	ulint	comp)		/*!< in: nonzero=compact page layout */
+{
+	if (comp) {
+		return((ulint)(UNIV_PAGE_SIZE
+			       - PAGE_NEW_SUPREMUM_END
+			       - PAGE_DIR
+			       - 2 * PAGE_DIR_SLOT_SIZE));
+	}
+
+	return((ulint)(UNIV_PAGE_SIZE
+		       - PAGE_OLD_SUPREMUM_END
+		       - PAGE_DIR
+		       - 2 * PAGE_DIR_SLOT_SIZE));
+}
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Write a 32-bit field in a data dictionary record. */
+UNIV_INLINE
+void
+page_rec_write_field(
+/*=================*/
+	rec_t*	rec,	/*!< in/out: record to update */
+	ulint	i,	/*!< in: index of the field to update */
+	ulint	val,	/*!< in: value to write */
+	mtr_t*	mtr)	/*!< in/out: mini-transaction */
+{
+	byte*	data;
+	ulint	len;
+
+	data = rec_get_nth_field_old(rec, i, &len);
+
+	ut_ad(len == 4);
+
+	mlog_write_ulint(data, val, MLOG_4BYTES, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/************************************************************//**
+Each user record on a page, and also the deleted user records in the heap
+takes its size plus the fraction of the dir cell size /
+PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
+value of page_get_free_space_of_empty, the insert is impossible, otherwise
+it is allowed. This function returns the maximum combined size of records
+which can be inserted on top of the record heap.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs)	/*!< in: number of records */
+{
+	ulint	occupied;
+	ulint	free_space;
+
+	if (page_is_comp(page)) {
+		occupied = page_header_get_field(page, PAGE_HEAP_TOP)
+			- PAGE_NEW_SUPREMUM_END
+			+ page_dir_calc_reserved_space(
+				n_recs + page_dir_get_n_heap(page) - 2);
+
+		free_space = page_get_free_space_of_empty(TRUE);
+	} else {
+		occupied = page_header_get_field(page, PAGE_HEAP_TOP)
+			- PAGE_OLD_SUPREMUM_END
+			+ page_dir_calc_reserved_space(
+				n_recs + page_dir_get_n_heap(page) - 2);
+
+		free_space = page_get_free_space_of_empty(FALSE);
+	}
+
+	/* Above the 'n_recs +' part reserves directory space for the new
+	inserted records; the '- 2' excludes page infimum and supremum
+	records */
+
+	if (occupied > free_space) {
+
+		return(0);
+	}
+
+	return(free_space - occupied);
+}
+
+/************************************************************//**
+Returns the maximum combined size of records which can be inserted on top
+of the record heap if a page is first reorganized.
+@return	maximum combined size for inserted records */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+	const page_t*	page,	/*!< in: index page */
+	ulint		n_recs)	/*!< in: number of records */
+{
+	ulint	occupied;
+	ulint	free_space;
+
+	occupied = page_get_data_size(page)
+		+ page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
+
+	free_space = page_get_free_space_of_empty(page_is_comp(page));
+
+	if (occupied > free_space) {
+
+		return(0);
+	}
+
+	return(free_space - occupied);
+}
+
+/************************************************************//**
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+	page_t*			page,		/*!< in/out: index page */
+	page_zip_des_t*		page_zip,	/*!< in/out: compressed page,
+						or NULL */
+	rec_t*			rec,		/*!< in: pointer to the
+						(origin of) record */
+	const dict_index_t*	index,		/*!< in: index of rec */
+	const ulint*		offsets)	/*!< in: array returned by
+						rec_get_offsets() */
+{
+	rec_t*		free;
+	ulint		garbage;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	free = page_header_get_ptr(page, PAGE_FREE);
+
+	page_rec_set_next(rec, free);
+	page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
+
+	garbage = page_header_get_field(page, PAGE_GARBAGE);
+
+	page_header_set_field(page, page_zip, PAGE_GARBAGE,
+			      garbage + rec_offs_size(offsets));
+
+	if (page_zip) {
+		page_zip_dir_delete(page_zip, rec, index, offsets, free);
+	} else {
+		page_header_set_field(page, page_zip, PAGE_N_RECS,
+				      page_get_n_recs(page) - 1);
+	}
+}
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
new file mode 100644
index 00000000000..95143a4bb44
--- /dev/null
+++ b/storage/innobase/include/page0types.h
@@ -0,0 +1,169 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0types.h
+Index page routines
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0types_h
+#define page0types_h
+
+using namespace std;
+
+#include <map>
+
+#include "univ.i"
+#include "dict0types.h"
+#include "mtr0types.h"
+
+/** Eliminates a name collision on HP-UX */
+#define page_t	   ib_page_t
+/** Type of the index page */
+typedef	byte		page_t;
+/** Index page cursor */
+struct page_cur_t;
+
+/** Compressed index page */
+typedef byte		page_zip_t;
+
+/* The following definitions would better belong to page0zip.h,
+but we cannot include page0zip.h from rem0rec.ic, because
+page0*.h includes rem0rec.h and may include rem0rec.ic. */
+
+/** Number of bits needed for representing different compressed page sizes */
+#define PAGE_ZIP_SSIZE_BITS 3
+
+/** Maximum compressed page shift size */
+#define PAGE_ZIP_SSIZE_MAX	\
+	(UNIV_ZIP_SIZE_SHIFT_MAX - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+
+/* Make sure there are enough bits available to store the maximum zip
+ssize, which is the number of shifts from 512. */
+#if PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)
+# error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)"
+#endif
+
+/** Compressed page descriptor */
+struct page_zip_des_t
+{
+	page_zip_t*	data;		/*!< compressed page data */
+
+#ifdef UNIV_DEBUG
+	unsigned	m_start:16;	/*!< start offset of modification log */
+	bool		m_external;	/*!< Allocated externally, not from the
+					buffer pool */
+#endif /* UNIV_DEBUG */
+	unsigned	m_end:16;	/*!< end offset of modification log */
+	unsigned	m_nonempty:1;	/*!< TRUE if the modification log
+					is not empty */
+	unsigned	n_blobs:12;	/*!< number of externally stored
+					columns on the page; the maximum
+					is 744 on a 16 KiB page */
+	unsigned	ssize:PAGE_ZIP_SSIZE_BITS;
+					/*!< 0 or compressed page shift size;
+					the size in bytes is
+					(UNIV_ZIP_SIZE_MIN >> 1) << ssize. */
+};
+
+/** Compression statistics for a given page size */
+struct page_zip_stat_t {
+	/** Number of page compressions */
+	ulint		compressed;
+	/** Number of successful page compressions */
+	ulint		compressed_ok;
+	/** Number of page decompressions */
+	ulint		decompressed;
+	/** Duration of page compressions in microseconds */
+	ib_uint64_t	compressed_usec;
+	/** Duration of page decompressions in microseconds */
+	ib_uint64_t	decompressed_usec;
+	page_zip_stat_t() :
+		/* Initialize members to 0 so that when we do
+		stlmap[key].compressed++ and element with "key" does not
+		exist it gets inserted with zeroed members. */
+		compressed(0),
+		compressed_ok(0),
+		decompressed(0),
+		compressed_usec(0),
+		decompressed_usec(0)
+	{ }
+};
+
+/** Compression statistics types */
+typedef map<index_id_t, page_zip_stat_t>	page_zip_stat_per_index_t;
+
+/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+extern page_zip_stat_t				page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by dict_index_t::id */
+extern page_zip_stat_per_index_t		page_zip_stat_per_index;
+extern ib_mutex_t				page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t				page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page.  The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page.  The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Shift the dense page directory when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in: deleted record */
+	dict_index_t*	index,	/*!< in: index of rec */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
+	const byte*	free)	/*!< in: previous start of the free list */
+	__attribute__((nonnull(1,2,3,4)));
+
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		is_clustered)	/*!< in: nonzero for clustered index,
+					zero for others */
+	__attribute__((nonnull));
+#endif
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
new file mode 100644
index 00000000000..9d3b78ed2fc
--- /dev/null
+++ b/storage/innobase/include/page0zip.h
@@ -0,0 +1,538 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0zip.h
+Compressed page interface
+
+Created June 2005 by Marko Makela
+*******************************************************/
+
+#ifndef page0zip_h
+#define page0zip_h
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "mtr0types.h"
+#include "page0types.h"
+#include "buf0types.h"
+#include "dict0types.h"
+#include "srv0srv.h"
+#include "trx0types.h"
+#include "mem0mem.h"
+
+/* Compression level to be used by zlib. Settable by user. */
+extern uint	page_zip_level;
+
+/* Default compression level. */
+#define DEFAULT_COMPRESSION_LEVEL	6
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+extern my_bool	page_zip_log_pages;
+
+/**********************************************************************//**
+Determine the size of a compressed page in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+page_zip_get_size(
+/*==============*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+	__attribute__((nonnull, pure));
+/**********************************************************************//**
+Set the size of a compressed page in bytes. */
+UNIV_INLINE
+void
+page_zip_set_size(
+/*==============*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		size);		/*!< in: size in bytes */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determine if a record is so big that it needs to be stored externally.
+@return	FALSE if the entire record can be stored locally on the page */
+UNIV_INLINE
+ibool
+page_zip_rec_needs_ext(
+/*===================*/
+	ulint	rec_size,	/*!< in: length of the record in bytes */
+	ulint	comp,		/*!< in: nonzero=compact format */
+	ulint	n_fields,	/*!< in: number of fields in the record;
+				ignored if zip_size == 0 */
+	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
+	__attribute__((const));
+
+/**********************************************************************//**
+Determine the guaranteed free space on an empty page.
+@return	minimum payload size on the page */
+UNIV_INTERN
+ulint
+page_zip_empty_size(
+/*================*/
+	ulint	n_fields,	/*!< in: number of columns in the index */
+	ulint	zip_size)	/*!< in: compressed page size in bytes */
+	__attribute__((const));
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Initialize a compressed page descriptor. */
+UNIV_INLINE
+void
+page_zip_des_init(
+/*==============*/
+	page_zip_des_t*	page_zip);	/*!< in/out: compressed page
+					descriptor */
+
+/**********************************************************************//**
+Configure the zlib allocator to use the given memory heap. */
+UNIV_INTERN
+void
+page_zip_set_alloc(
+/*===============*/
+	void*		stream,		/*!< in/out: zlib stream */
+	mem_heap_t*	heap);		/*!< in: memory heap to use */
+
+/**********************************************************************//**
+Compress a page.
+@return TRUE on success, FALSE on failure; page_zip will be left
+intact on failure. */
+UNIV_INTERN
+ibool
+page_zip_compress(
+/*==============*/
+	page_zip_des_t*	page_zip,/*!< in: size; out: data, n_blobs,
+				m_start, m_end, m_nonempty */
+	const page_t*	page,	/*!< in: uncompressed page */
+	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	ulint		level,	/*!< in: compression level */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2,3)));
+
+/**********************************************************************//**
+Decompress a page.  This function should tolerate errors on the compressed
+page.  Instead of letting assertions fail, it will return FALSE if an
+inconsistency is detected.
+@return	TRUE on success, FALSE on failure */
+UNIV_INTERN
+ibool
+page_zip_decompress(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in: data, ssize;
+				out: m_start, m_end, m_nonempty, n_blobs */
+	page_t*		page,	/*!< out: uncompressed page, may be trashed */
+	ibool		all)	/*!< in: TRUE=decompress the whole page;
+				FALSE=verify but do not copy some
+				page header fields that should not change
+				after page creation */
+	__attribute__((nonnull(1,2)));
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Validate a compressed page descriptor.
+@return	TRUE if ok */
+UNIV_INLINE
+ibool
+page_zip_simple_validate(
+/*=====================*/
+	const page_zip_des_t*	page_zip);	/*!< in: compressed page
+						descriptor */
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_ZIP_DEBUG
+/**********************************************************************//**
+Check that the compressed and decompressed pages match.
+@return	TRUE if valid, FALSE if not */
+UNIV_INTERN
+ibool
+page_zip_validate_low(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	const dict_index_t*	index,	/*!< in: index of the page, if known */
+	ibool			sloppy)	/*!< in: FALSE=strict,
+					TRUE=ignore the MIN_REC_FLAG */
+	__attribute__((nonnull(1,2)));
+/**********************************************************************//**
+Check that the compressed and decompressed pages match. */
+UNIV_INTERN
+ibool
+page_zip_validate(
+/*==============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	const dict_index_t*	index)	/*!< in: index of the page, if known */
+	__attribute__((nonnull(1,2)));
+#endif /* UNIV_ZIP_DEBUG */
+
+/**********************************************************************//**
+Determine how big record can be inserted without recompressing the page.
+@return a positive number indicating the maximum size of a record
+whose insertion is guaranteed to succeed, or zero or negative */
+UNIV_INLINE
+lint
+page_zip_max_ins_size(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust)/*!< in: TRUE if clustered index */
+	__attribute__((nonnull, pure));
+
+/**********************************************************************//**
+Determine if enough space is available in the modification log.
+@return	TRUE if page_zip_write_rec() will succeed */
+UNIV_INLINE
+ibool
+page_zip_available(
+/*===============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust,/*!< in: TRUE if clustered index */
+	ulint			length,	/*!< in: combined size of the record */
+	ulint			create)	/*!< in: nonzero=add the record to
+					the heap */
+	__attribute__((nonnull, pure));
+
+/**********************************************************************//**
+Write data to the uncompressed header portion of a page.  The data must
+already have been written to the uncompressed page. */
+UNIV_INLINE
+void
+page_zip_write_header(
+/*==================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	str,	/*!< in: address on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Write an entire record on the compressed page.  The data must already
+have been written to the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_rec(
+/*===============*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record being written */
+	dict_index_t*	index,	/*!< in: the index the record belongs to */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		create)	/*!< in: nonzero=insert, zero=update */
+	__attribute__((nonnull));
+
+/***********************************************************//**
+Parses a log record of writing a BLOB pointer of a record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_blob_ptr(
+/*==========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
+
+/**********************************************************************//**
+Write a BLOB pointer of a record on the leaf page of a clustered index.
+The information must already have been updated on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_write_blob_ptr(
+/*====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in/out: record whose data is being
+				written */
+	dict_index_t*	index,	/*!< in: index of the page */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		n,	/*!< in: column index */
+	mtr_t*		mtr)	/*!< in: mini-transaction handle,
+				or NULL if no logging is needed */
+	__attribute__((nonnull(1,2,3,4)));
+
+/***********************************************************//**
+Parses a log record of writing the node pointer of a record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_node_ptr(
+/*==========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
+
+/**********************************************************************//**
+Write the node pointer of a record on a non-leaf compressed page. */
+UNIV_INTERN
+void
+page_zip_write_node_ptr(
+/*====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in/out: record */
+	ulint		size,	/*!< in: data size of rec */
+	ulint		ptr,	/*!< in: node pointer */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+UNIV_INTERN
+void
+page_zip_write_trx_id_and_roll_ptr(
+/*===============================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	byte*		rec,	/*!< in/out: record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
+	trx_id_t	trx_id,	/*!< in: transaction identifier */
+	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "deleted" flag of a record on a compressed page.  The flag must
+already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_deleted(
+/*=====================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Write the "owned" flag of a record on a compressed page.  The n_owned field
+must already have been written on the uncompressed page. */
+UNIV_INTERN
+void
+page_zip_rec_set_owned(
+/*===================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	rec,	/*!< in: record on the uncompressed page */
+	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Insert a record to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_insert(
+/*================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	prev_rec,/*!< in: record after which to insert */
+	const byte*	free_rec,/*!< in: record from which rec was
+				allocated, or NULL */
+	byte*		rec);	/*!< in: record to insert */
+
+/**********************************************************************//**
+Shift the dense page directory and the array of BLOB pointers
+when a record is deleted. */
+UNIV_INTERN
+void
+page_zip_dir_delete(
+/*================*/
+	page_zip_des_t*		page_zip,	/*!< in/out: compressed page */
+	byte*			rec,		/*!< in: deleted record */
+	const dict_index_t*	index,		/*!< in: index of rec */
+	const ulint*		offsets,	/*!< in: rec_get_offsets(rec) */
+	const byte*		free)		/*!< in: previous start of
+						the free list */
+	__attribute__((nonnull(1,2,3,4)));
+
+/**********************************************************************//**
+Add a slot to the dense page directory. */
+UNIV_INTERN
+void
+page_zip_dir_add_slot(
+/*==================*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		is_clustered)	/*!< in: nonzero for clustered index,
+					zero for others */
+	__attribute__((nonnull));
+
+/***********************************************************//**
+Parses a log record of writing to the header of a page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_write_header(
+/*========================*/
+	byte*		ptr,	/*!< in: redo log buffer */
+	byte*		end_ptr,/*!< in: redo log buffer end */
+	page_t*		page,	/*!< in/out: uncompressed page */
+	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
+
+/**********************************************************************//**
+Write data to the uncompressed header portion of a page.  The data must
+already have been written to the uncompressed page.
+However, the data portion of the uncompressed page may differ from
+the compressed page when a record is being inserted in
+page_cur_insert_rec_low(). */
+UNIV_INLINE
+void
+page_zip_write_header(
+/*==================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	str,	/*!< in: address on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Reorganize and compress a page.  This is a low-level operation for
+compressed pages, to be used when page_zip_compress() fails.
+On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
+The function btr_page_reorganize() should be preferred whenever possible.
+IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
+non-clustered index, the caller must update the insert buffer free
+bits in the same mini-transaction in such a way that the modification
+will be redo-logged.
+@return TRUE on success, FALSE on failure; page_zip will be left
+intact on failure, but page will be overwritten. */
+UNIV_INTERN
+ibool
+page_zip_reorganize(
+/*================*/
+	buf_block_t*	block,	/*!< in/out: page with compressed page;
+				on the compressed page, in: size;
+				out: data, n_blobs,
+				m_start, m_end, m_nonempty */
+	dict_index_t*	index,	/*!< in: index of the B-tree node */
+	mtr_t*		mtr)	/*!< in: mini-transaction */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Copy the records of a page byte for byte.  Do not copy the page header
+or trailer, except those B-tree header fields that are directly
+related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
+NOTE: The caller must update the lock table and the adaptive hash index. */
+UNIV_INTERN
+void
+page_zip_copy_recs(
+/*===============*/
+	page_zip_des_t*		page_zip,	/*!< out: copy of src_zip
+						(n_blobs, m_start, m_end,
+						m_nonempty, data[0..size-1]) */
+	page_t*			page,		/*!< out: copy of src */
+	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
+	const page_t*		src,		/*!< in: page */
+	dict_index_t*		index,		/*!< in: index of the B-tree */
+	mtr_t*			mtr)		/*!< in: mini-transaction */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Parses a log record of compressing an index page.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+page_zip_parse_compress(
+/*====================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	page_t*		page,	/*!< out: uncompressed page */
+	page_zip_des_t*	page_zip)/*!< out: compressed page */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Calculate the compressed page checksum.
+@return	page checksum */
+UNIV_INTERN
+ulint
+page_zip_calc_checksum(
+/*===================*/
+        const void*     data,   /*!< in: compressed page */
+        ulint           size,   /*!< in: size of compressed page */
+	srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Verify a compressed page's checksum.
+@return	TRUE if the stored checksum is valid according to the value of
+innodb_checksum_algorithm */
+UNIV_INTERN
+ibool
+page_zip_verify_checksum(
+/*=====================*/
+	const void*	data,	/*!< in: compressed page */
+	ulint		size);	/*!< in: size of compressed page */
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+	ulint		level,	/*!< in: compression level */
+	const page_t*	page,	/*!< in: page that is compressed */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr);	/*!< in: mtr */
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return	end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+	byte*		ptr,		/*!< in: buffer */
+	byte*		end_ptr,	/*!< in: buffer end */
+	page_t*		page,		/*!< in: uncompressed page */
+	page_zip_des_t*	page_zip,	/*!< out: compressed page */
+	dict_index_t*	index)		/*!< in: index */
+	__attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index();
+/*===========================*/
+
+#ifndef UNIV_HOTBACKUP
+/** Check if a pointer to an uncompressed page matches a compressed page.
+When we IMPORT a tablespace the blocks and accompanying frames are allocted
+from outside the buffer pool.
+@param ptr	pointer to an uncompressed page frame
+@param page_zip	compressed page descriptor
+@return		TRUE if ptr and page_zip refer to the same block */
+# define PAGE_ZIP_MATCH(ptr, page_zip)					\
+	(((page_zip)->m_external					\
+	  && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data))	\
+	  || buf_frame_get_page_zip(ptr) == (page_zip))
+#else /* !UNIV_HOTBACKUP */
+/** Check if a pointer to an uncompressed page matches a compressed page.
+@param ptr	pointer to an uncompressed page frame
+@param page_zip	compressed page descriptor
+@return		TRUE if ptr and page_zip refer to the same block */
+# define PAGE_ZIP_MATCH(ptr, page_zip)				\
+	(page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
+
+#ifndef UNIV_NONINL
+# include "page0zip.ic"
+#endif
+
+#endif /* page0zip_h */
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
new file mode 100644
index 00000000000..6c7d8cd32c7
--- /dev/null
+++ b/storage/innobase/include/page0zip.ic
@@ -0,0 +1,456 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/page0zip.ic
+Compressed page interface
+
+Created June 2005 by Marko Makela
+*******************************************************/
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE
+#endif
+
+#include "page0zip.h"
+#include "mtr0log.h"
+#include "page0page.h"
+
+/* The format of compressed pages is as follows.
+
+The header and trailer of the uncompressed pages, excluding the page
+directory in the trailer, are copied as is to the header and trailer
+of the compressed page.
+
+At the end of the compressed page, there is a dense page directory
+pointing to every user record contained on the page, including deleted
+records on the free list.  The dense directory is indexed in the
+collation order, i.e., in the order in which the record list is
+linked on the uncompressed page.  The infimum and supremum records are
+excluded.  The two most significant bits of the entries are allocated
+for the delete-mark and an n_owned flag indicating the last record in
+a chain of records pointed to from the sparse page directory on the
+uncompressed page.
+
+The data between PAGE_ZIP_START and the last page directory entry will
+be written in compressed format, starting at offset PAGE_DATA.
+Infimum and supremum records are not stored.  We exclude the
+REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
+from the dense page directory stored at the end of the compressed
+page.
+
+The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
+roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
+externally stored columns are stored separately, in ascending order of
+heap_no and column index, starting backwards from the dense page
+directory.
+
+The compressed data stream may be followed by a modification log
+covering the compressed portion of the page, as follows.
+
+MODIFICATION LOG ENTRY FORMAT
+- write record:
+  - (heap_no - 1) << 1 (1..2 bytes)
+  - extra bytes backwards
+  - data bytes
+- clear record:
+  - (heap_no - 1) << 1 | 1 (1..2 bytes)
+
+The integer values are stored in a variable-length format:
+- 0xxxxxxx: 0..127
+- 1xxxxxxx xxxxxxxx: 0..32767
+
+The end of the modification log is marked by a 0 byte.
+
+In summary, the compressed page looks like this:
+
+(1) Uncompressed page header (PAGE_DATA bytes)
+(2) Compressed index information
+(3) Compressed page data
+(4) Page modification log (page_zip->m_start..page_zip->m_end)
+(5) Empty zero-filled space
+(6) BLOB pointers (on leaf pages)
+  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
+  - in descending collation order
+(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
+  - indexed by heap_no
+  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
+  - REC_NODE_PTR_SIZE for non-leaf pages
+  - 0 otherwise
+(8) dense page directory, stored backwards
+  - n_dense = n_heap - 2
+  - existing records in ascending collation order
+  - deleted records (free list) in link order
+*/
+
+/** Start offset of the area that will be compressed */
+#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
+/** Size of an compressed page directory entry */
+#define PAGE_ZIP_DIR_SLOT_SIZE	2
+/** Mask of record offsets */
+#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
+/** 'owned' flag */
+#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
+/** 'deleted' flag */
+#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
+
+/**********************************************************************//**
+Determine the size of a compressed page in bytes.
+@return	size in bytes */
+UNIV_INLINE
+ulint
+page_zip_get_size(
+/*==============*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+{
+	ulint	size;
+
+	if (!page_zip->ssize) {
+		return(0);
+	}
+
+	size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
+
+	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
+	ut_ad(size <= UNIV_PAGE_SIZE);
+
+	return(size);
+}
+/**********************************************************************//**
+Set the size of a compressed page in bytes. */
+UNIV_INLINE
+void
+page_zip_set_size(
+/*==============*/
+	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
+	ulint		size)		/*!< in: size in bytes */
+{
+	if (size) {
+		int	ssize;
+
+		ut_ad(ut_is_2pow(size));
+
+		for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
+		}
+
+		page_zip->ssize = ssize;
+	} else {
+		page_zip->ssize = 0;
+	}
+
+	ut_ad(page_zip_get_size(page_zip) == size);
+}
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determine if a record is so big that it needs to be stored externally.
+@return	FALSE if the entire record can be stored locally on the page */
+UNIV_INLINE
+ibool
+page_zip_rec_needs_ext(
+/*===================*/
+	ulint	rec_size,	/*!< in: length of the record in bytes */
+	ulint	comp,		/*!< in: nonzero=compact format */
+	ulint	n_fields,	/*!< in: number of fields in the record;
+				ignored if zip_size == 0 */
+	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
+{
+	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
+	ut_ad(ut_is_2pow(zip_size));
+	ut_ad(comp || !zip_size);
+
+#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
+	if (rec_size >= REC_MAX_DATA_SIZE) {
+		return(TRUE);
+	}
+#endif
+
+	if (zip_size) {
+		ut_ad(comp);
+		/* On a compressed page, there is a two-byte entry in
+		the dense page directory for every record.  But there
+		is no record header.  There should be enough room for
+		one record on an empty leaf page.  Subtract 1 byte for
+		the encoded heap number.  Check also the available space
+		on the uncompressed page. */
+		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
+		       >= page_zip_empty_size(n_fields, zip_size)
+		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
+	}
+
+	return(rec_size >= page_get_free_space_of_empty(comp) / 2);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Validate a compressed page descriptor.
+@return	TRUE if ok */
+UNIV_INLINE
+ibool
+page_zip_simple_validate(
+/*=====================*/
+	const page_zip_des_t*	page_zip)/*!< in: compressed page descriptor */
+{
+	ut_ad(page_zip);
+	ut_ad(page_zip->data);
+	ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX);
+	ut_ad(page_zip_get_size(page_zip)
+	      > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
+	ut_ad(page_zip->m_start <= page_zip->m_end);
+	ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
+	ut_ad(page_zip->n_blobs
+	      < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Determine if the length of the page trailer.
+@return length of the page trailer, in bytes, not including the
+terminating zero byte of the modification log */
+UNIV_INLINE
+ibool
+page_zip_get_trailer_len(
+/*=====================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust)/*!< in: TRUE if clustered index */
+{
+	ulint	uncompressed_size;
+
+	ut_ad(page_zip_simple_validate(page_zip));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	if (!page_is_leaf(page_zip->data)) {
+		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+			+ REC_NODE_PTR_SIZE;
+		ut_ad(!page_zip->n_blobs);
+	} else if (is_clust) {
+		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
+			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+	} else {
+		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
+		ut_ad(!page_zip->n_blobs);
+	}
+
+	return((page_dir_get_n_heap(page_zip->data) - 2)
+	       * uncompressed_size
+	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
+}
+
+/**********************************************************************//**
+Determine how big record can be inserted without recompressing the page.
+@return a positive number indicating the maximum size of a record
+whose insertion is guaranteed to succeed, or zero or negative */
+UNIV_INLINE
+lint
+page_zip_max_ins_size(
+/*==================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust)/*!< in: TRUE if clustered index */
+{
+	ulint	trailer_len;
+
+	trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
+
+	/* When a record is created, a pointer may be added to
+	the dense directory.
+	Likewise, space for the columns that will not be
+	compressed will be allocated from the page trailer.
+	Also the BLOB pointers will be allocated from there, but
+	we may as well count them in the length of the record. */
+
+	trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
+
+	return((lint) page_zip_get_size(page_zip)
+	       - trailer_len - page_zip->m_end
+	       - (REC_N_NEW_EXTRA_BYTES - 2));
+}
+
+/**********************************************************************//**
+Determine if enough space is available in the modification log.
+@return	TRUE if enough space is available */
+UNIV_INLINE
+ibool
+page_zip_available(
+/*===============*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	ibool			is_clust,/*!< in: TRUE if clustered index */
+	ulint			length,	/*!< in: combined size of the record */
+	ulint			create)	/*!< in: nonzero=add the record to
+					the heap */
+{
+	ulint	trailer_len;
+
+	ut_ad(length > REC_N_NEW_EXTRA_BYTES);
+
+	trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
+
+	/* Subtract the fixed extra bytes and add the maximum
+	space needed for identifying the record (encoded heap_no). */
+	length -= REC_N_NEW_EXTRA_BYTES - 2;
+
+	if (create > 0) {
+		/* When a record is created, a pointer may be added to
+		the dense directory.
+		Likewise, space for the columns that will not be
+		compressed will be allocated from the page trailer.
+		Also the BLOB pointers will be allocated from there, but
+		we may as well count them in the length of the record. */
+
+		trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
+	}
+
+	return(length + trailer_len + page_zip->m_end
+	       < page_zip_get_size(page_zip));
+}
+
+/**********************************************************************//**
+Initialize a compressed page descriptor. */
+UNIV_INLINE
+void
+page_zip_des_init(
+/*==============*/
+	page_zip_des_t*	page_zip)	/*!< in/out: compressed page
+					descriptor */
+{
+	memset(page_zip, 0, sizeof *page_zip);
+}
+
+/**********************************************************************//**
+Write a log record of writing to the uncompressed header portion of a page. */
+UNIV_INTERN
+void
+page_zip_write_header_log(
+/*======================*/
+	const byte*	data,/*!< in: data on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr);	/*!< in: mini-transaction */
+
+/**********************************************************************//**
+Write data to the uncompressed header portion of a page.  The data must
+already have been written to the uncompressed page.
+However, the data portion of the uncompressed page may differ from
+the compressed page when a record is being inserted in
+page_cur_insert_rec_zip(). */
+UNIV_INLINE
+void
+page_zip_write_header(
+/*==================*/
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
+	const byte*	str,	/*!< in: address on the uncompressed page */
+	ulint		length,	/*!< in: length of the data */
+	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
+{
+	ulint	pos;
+
+	ut_ad(PAGE_ZIP_MATCH(str, page_zip));
+	ut_ad(page_zip_simple_validate(page_zip));
+	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
+
+	pos = page_offset(str);
+
+	ut_ad(pos < PAGE_DATA);
+
+	memcpy(page_zip->data + pos, str, length);
+
+	/* The following would fail in page_cur_insert_rec_zip(). */
+	/* ut_ad(page_zip_validate(page_zip, str - pos)); */
+
+	if (mtr) {
+#ifndef UNIV_HOTBACKUP
+		page_zip_write_header_log(str, length, mtr);
+#endif /* !UNIV_HOTBACKUP */
+	}
+}
+
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+	ulint		level,	/*!< in: compression level */
+	const page_t*	page,	/*!< in: page that is compressed */
+	dict_index_t*	index,	/*!< in: index */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	byte* log_ptr = mlog_open_and_write_index(
+		mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1);
+
+	if (log_ptr) {
+		mach_write_to_1(log_ptr, level);
+		mlog_close(mtr, log_ptr + 1);
+	}
+}
+
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return	end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+	byte*		ptr,		/*!< in: buffer */
+	byte*		end_ptr,	/*!< in: buffer end */
+	page_t*		page,		/*!< in: uncompressed page */
+	page_zip_des_t*	page_zip,	/*!< out: compressed page */
+	dict_index_t*	index)		/*!< in: index */
+{
+	ulint	level;
+	if (end_ptr == ptr) {
+		return(NULL);
+	}
+
+	level = mach_read_from_1(ptr);
+
+	/* If page compression fails then there must be something wrong
+	because a compress log record is logged only if the compression
+	was successful. Crash in this case. */
+
+	if (page
+	    && !page_zip_compress(page_zip, page, index, level, NULL)) {
+		ut_error;
+	}
+
+	return(ptr + 1);
+}
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index()
+/*===========================*/
+{
+	mutex_enter(&page_zip_stat_per_index_mutex);
+
+	page_zip_stat_per_index.erase(
+		page_zip_stat_per_index.begin(),
+		page_zip_stat_per_index.end());
+
+	mutex_exit(&page_zip_stat_per_index_mutex);
+}
+
+#ifdef UNIV_MATERIALIZE
+# undef UNIV_INLINE
+# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/storage/innobase/include/pars0grm.h b/storage/innobase/include/pars0grm.h
new file mode 100644
index 00000000000..8e725fe9545
--- /dev/null
+++ b/storage/innobase/include/pars0grm.h
@@ -0,0 +1,261 @@
+/* A Bison parser, made by GNU Bison 2.3.  */
+
+/* Skeleton interface for Bison's Yacc-like parsers in C
+
+   Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+/* As a special exception, you may create a larger work that contains
+   part or all of the Bison parser skeleton and distribute that work
+   under terms of your choice, so long as that work isn't itself a
+   parser generator using the skeleton or a modified version thereof
+   as a parser skeleton.  Alternatively, if you modify or redistribute
+   the parser skeleton itself, you may (at your option) remove this
+   special exception, which will cause the skeleton and the resulting
+   Bison output files to be licensed under the GNU General Public
+   License without this special exception.
+
+   This special exception was added by the Free Software Foundation in
+   version 2.2 of Bison.  */
+
+/* Tokens.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+   /* Put the tokens into the symbol table, so that GDB and other debuggers
+      know about them.  */
+   enum yytokentype {
+     PARS_INT_LIT = 258,
+     PARS_FLOAT_LIT = 259,
+     PARS_STR_LIT = 260,
+     PARS_FIXBINARY_LIT = 261,
+     PARS_BLOB_LIT = 262,
+     PARS_NULL_LIT = 263,
+     PARS_ID_TOKEN = 264,
+     PARS_AND_TOKEN = 265,
+     PARS_OR_TOKEN = 266,
+     PARS_NOT_TOKEN = 267,
+     PARS_GE_TOKEN = 268,
+     PARS_LE_TOKEN = 269,
+     PARS_NE_TOKEN = 270,
+     PARS_PROCEDURE_TOKEN = 271,
+     PARS_IN_TOKEN = 272,
+     PARS_OUT_TOKEN = 273,
+     PARS_BINARY_TOKEN = 274,
+     PARS_BLOB_TOKEN = 275,
+     PARS_INT_TOKEN = 276,
+     PARS_INTEGER_TOKEN = 277,
+     PARS_FLOAT_TOKEN = 278,
+     PARS_CHAR_TOKEN = 279,
+     PARS_IS_TOKEN = 280,
+     PARS_BEGIN_TOKEN = 281,
+     PARS_END_TOKEN = 282,
+     PARS_IF_TOKEN = 283,
+     PARS_THEN_TOKEN = 284,
+     PARS_ELSE_TOKEN = 285,
+     PARS_ELSIF_TOKEN = 286,
+     PARS_LOOP_TOKEN = 287,
+     PARS_WHILE_TOKEN = 288,
+     PARS_RETURN_TOKEN = 289,
+     PARS_SELECT_TOKEN = 290,
+     PARS_SUM_TOKEN = 291,
+     PARS_COUNT_TOKEN = 292,
+     PARS_DISTINCT_TOKEN = 293,
+     PARS_FROM_TOKEN = 294,
+     PARS_WHERE_TOKEN = 295,
+     PARS_FOR_TOKEN = 296,
+     PARS_DDOT_TOKEN = 297,
+     PARS_READ_TOKEN = 298,
+     PARS_ORDER_TOKEN = 299,
+     PARS_BY_TOKEN = 300,
+     PARS_ASC_TOKEN = 301,
+     PARS_DESC_TOKEN = 302,
+     PARS_INSERT_TOKEN = 303,
+     PARS_INTO_TOKEN = 304,
+     PARS_VALUES_TOKEN = 305,
+     PARS_UPDATE_TOKEN = 306,
+     PARS_SET_TOKEN = 307,
+     PARS_DELETE_TOKEN = 308,
+     PARS_CURRENT_TOKEN = 309,
+     PARS_OF_TOKEN = 310,
+     PARS_CREATE_TOKEN = 311,
+     PARS_TABLE_TOKEN = 312,
+     PARS_INDEX_TOKEN = 313,
+     PARS_UNIQUE_TOKEN = 314,
+     PARS_CLUSTERED_TOKEN = 315,
+     PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
+     PARS_ON_TOKEN = 317,
+     PARS_ASSIGN_TOKEN = 318,
+     PARS_DECLARE_TOKEN = 319,
+     PARS_CURSOR_TOKEN = 320,
+     PARS_SQL_TOKEN = 321,
+     PARS_OPEN_TOKEN = 322,
+     PARS_FETCH_TOKEN = 323,
+     PARS_CLOSE_TOKEN = 324,
+     PARS_NOTFOUND_TOKEN = 325,
+     PARS_TO_CHAR_TOKEN = 326,
+     PARS_TO_NUMBER_TOKEN = 327,
+     PARS_TO_BINARY_TOKEN = 328,
+     PARS_BINARY_TO_NUMBER_TOKEN = 329,
+     PARS_SUBSTR_TOKEN = 330,
+     PARS_REPLSTR_TOKEN = 331,
+     PARS_CONCAT_TOKEN = 332,
+     PARS_INSTR_TOKEN = 333,
+     PARS_LENGTH_TOKEN = 334,
+     PARS_SYSDATE_TOKEN = 335,
+     PARS_PRINTF_TOKEN = 336,
+     PARS_ASSERT_TOKEN = 337,
+     PARS_RND_TOKEN = 338,
+     PARS_RND_STR_TOKEN = 339,
+     PARS_ROW_PRINTF_TOKEN = 340,
+     PARS_COMMIT_TOKEN = 341,
+     PARS_ROLLBACK_TOKEN = 342,
+     PARS_WORK_TOKEN = 343,
+     PARS_UNSIGNED_TOKEN = 344,
+     PARS_EXIT_TOKEN = 345,
+     PARS_FUNCTION_TOKEN = 346,
+     PARS_LOCK_TOKEN = 347,
+     PARS_SHARE_TOKEN = 348,
+     PARS_MODE_TOKEN = 349,
+     PARS_LIKE_TOKEN = 350,
+     PARS_LIKE_TOKEN_EXACT = 351,
+     PARS_LIKE_TOKEN_PREFIX = 352,
+     PARS_LIKE_TOKEN_SUFFIX = 353,
+     PARS_LIKE_TOKEN_SUBSTR = 354,
+     PARS_TABLE_NAME_TOKEN = 355,
+     PARS_COMPACT_TOKEN = 356,
+     PARS_BLOCK_SIZE_TOKEN = 357,
+     PARS_BIGINT_TOKEN = 358,
+     NEG = 359
+   };
+#endif
+/* Tokens.  */
+#define PARS_INT_LIT 258
+#define PARS_FLOAT_LIT 259
+#define PARS_STR_LIT 260
+#define PARS_FIXBINARY_LIT 261
+#define PARS_BLOB_LIT 262
+#define PARS_NULL_LIT 263
+#define PARS_ID_TOKEN 264
+#define PARS_AND_TOKEN 265
+#define PARS_OR_TOKEN 266
+#define PARS_NOT_TOKEN 267
+#define PARS_GE_TOKEN 268
+#define PARS_LE_TOKEN 269
+#define PARS_NE_TOKEN 270
+#define PARS_PROCEDURE_TOKEN 271
+#define PARS_IN_TOKEN 272
+#define PARS_OUT_TOKEN 273
+#define PARS_BINARY_TOKEN 274
+#define PARS_BLOB_TOKEN 275
+#define PARS_INT_TOKEN 276
+#define PARS_INTEGER_TOKEN 277
+#define PARS_FLOAT_TOKEN 278
+#define PARS_CHAR_TOKEN 279
+#define PARS_IS_TOKEN 280
+#define PARS_BEGIN_TOKEN 281
+#define PARS_END_TOKEN 282
+#define PARS_IF_TOKEN 283
+#define PARS_THEN_TOKEN 284
+#define PARS_ELSE_TOKEN 285
+#define PARS_ELSIF_TOKEN 286
+#define PARS_LOOP_TOKEN 287
+#define PARS_WHILE_TOKEN 288
+#define PARS_RETURN_TOKEN 289
+#define PARS_SELECT_TOKEN 290
+#define PARS_SUM_TOKEN 291
+#define PARS_COUNT_TOKEN 292
+#define PARS_DISTINCT_TOKEN 293
+#define PARS_FROM_TOKEN 294
+#define PARS_WHERE_TOKEN 295
+#define PARS_FOR_TOKEN 296
+#define PARS_DDOT_TOKEN 297
+#define PARS_READ_TOKEN 298
+#define PARS_ORDER_TOKEN 299
+#define PARS_BY_TOKEN 300
+#define PARS_ASC_TOKEN 301
+#define PARS_DESC_TOKEN 302
+#define PARS_INSERT_TOKEN 303
+#define PARS_INTO_TOKEN 304
+#define PARS_VALUES_TOKEN 305
+#define PARS_UPDATE_TOKEN 306
+#define PARS_SET_TOKEN 307
+#define PARS_DELETE_TOKEN 308
+#define PARS_CURRENT_TOKEN 309
+#define PARS_OF_TOKEN 310
+#define PARS_CREATE_TOKEN 311
+#define PARS_TABLE_TOKEN 312
+#define PARS_INDEX_TOKEN 313
+#define PARS_UNIQUE_TOKEN 314
+#define PARS_CLUSTERED_TOKEN 315
+#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
+#define PARS_ON_TOKEN 317
+#define PARS_ASSIGN_TOKEN 318
+#define PARS_DECLARE_TOKEN 319
+#define PARS_CURSOR_TOKEN 320
+#define PARS_SQL_TOKEN 321
+#define PARS_OPEN_TOKEN 322
+#define PARS_FETCH_TOKEN 323
+#define PARS_CLOSE_TOKEN 324
+#define PARS_NOTFOUND_TOKEN 325
+#define PARS_TO_CHAR_TOKEN 326
+#define PARS_TO_NUMBER_TOKEN 327
+#define PARS_TO_BINARY_TOKEN 328
+#define PARS_BINARY_TO_NUMBER_TOKEN 329
+#define PARS_SUBSTR_TOKEN 330
+#define PARS_REPLSTR_TOKEN 331
+#define PARS_CONCAT_TOKEN 332
+#define PARS_INSTR_TOKEN 333
+#define PARS_LENGTH_TOKEN 334
+#define PARS_SYSDATE_TOKEN 335
+#define PARS_PRINTF_TOKEN 336
+#define PARS_ASSERT_TOKEN 337
+#define PARS_RND_TOKEN 338
+#define PARS_RND_STR_TOKEN 339
+#define PARS_ROW_PRINTF_TOKEN 340
+#define PARS_COMMIT_TOKEN 341
+#define PARS_ROLLBACK_TOKEN 342
+#define PARS_WORK_TOKEN 343
+#define PARS_UNSIGNED_TOKEN 344
+#define PARS_EXIT_TOKEN 345
+#define PARS_FUNCTION_TOKEN 346
+#define PARS_LOCK_TOKEN 347
+#define PARS_SHARE_TOKEN 348
+#define PARS_MODE_TOKEN 349
+#define PARS_LIKE_TOKEN 350
+#define PARS_LIKE_TOKEN_EXACT 351
+#define PARS_LIKE_TOKEN_PREFIX 352
+#define PARS_LIKE_TOKEN_SUFFIX 353
+#define PARS_LIKE_TOKEN_SUBSTR 354
+#define PARS_TABLE_NAME_TOKEN 355
+#define PARS_COMPACT_TOKEN 356
+#define PARS_BLOCK_SIZE_TOKEN 357
+#define PARS_BIGINT_TOKEN 358
+#define NEG 359
+
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef int YYSTYPE;
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+# define YYSTYPE_IS_TRIVIAL 1
+#endif
+
+extern YYSTYPE yylval;
+
diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h
new file mode 100644
index 00000000000..1084d644c90
--- /dev/null
+++ b/storage/innobase/include/pars0opt.h
@@ -0,0 +1,75 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0opt.h
+Simple SQL optimizer
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0opt_h
+#define pars0opt_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0sym.h"
+#include "dict0types.h"
+#include "row0sel.h"
+
+/*******************************************************************//**
+Optimizes a select. Decides which indexes to tables to use. The tables
+are accessed in the order that they were written to the FROM part in the
+select statement. */
+UNIV_INTERN
+void
+opt_search_plan(
+/*============*/
+	sel_node_t*	sel_node);	/*!< in: parsed select node */
+/*******************************************************************//**
+Looks for occurrences of the columns of the table in the query subgraph and
+adds them to the list of columns if an occurrence of the same column does not
+already exist in the list. If the column is already in the list, puts a value
+indirection to point to the occurrence in the column list, except if the
+column occurrence we are looking at is in the column list, in which case
+nothing is done. */
+UNIV_INTERN
+void
+opt_find_all_cols(
+/*==============*/
+	ibool		copy_val,	/*!< in: if TRUE, new found columns are
+					added as columns to copy */
+	dict_index_t*	index,		/*!< in: index to use */
+	sym_node_list_t* col_list,	/*!< in: base node of a list where
+					to add new found columns */
+	plan_t*		plan,		/*!< in: plan or NULL */
+	que_node_t*	exp);		/*!< in: expression or condition */
+/********************************************************************//**
+Prints info of a query plan. */
+UNIV_INTERN
+void
+opt_print_query_plan(
+/*=================*/
+	sel_node_t*	sel_node);	/*!< in: select node */
+
+#ifndef UNIV_NONINL
+#include "pars0opt.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic
new file mode 100644
index 00000000000..786d911ca3d
--- /dev/null
+++ b/storage/innobase/include/pars0opt.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0opt.ic
+Simple SQL optimizer
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
new file mode 100644
index 00000000000..65ff7533828
--- /dev/null
+++ b/storage/innobase/include/pars0pars.h
@@ -0,0 +1,826 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0pars.h
+SQL parser
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0pars_h
+#define pars0pars_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+#include "ut0vec.h"
+
+/** Type of the user functions. The first argument is always InnoDB-supplied
+and varies in type, while 'user_arg' is a user-supplied argument. The
+meaning of the return type also varies. See the individual use cases, e.g.
+the FETCH statement, for details on them. */
+typedef ibool	(*pars_user_func_cb_t)(void* arg, void* user_arg);
+
+/** If the following is set TRUE, the parser will emit debugging
+information */
+extern int	yydebug;
+
+#ifdef UNIV_SQL_DEBUG
+/** If the following is set TRUE, the lexer will print the SQL string
+as it tokenizes it */
+extern ibool	pars_print_lexed;
+#endif /* UNIV_SQL_DEBUG */
+
+/* Global variable used while parsing a single procedure or query : the code is
+NOT re-entrant */
+extern sym_tab_t*	pars_sym_tab_global;
+
+extern pars_res_word_t	pars_to_char_token;
+extern pars_res_word_t	pars_to_number_token;
+extern pars_res_word_t	pars_to_binary_token;
+extern pars_res_word_t	pars_binary_to_number_token;
+extern pars_res_word_t	pars_substr_token;
+extern pars_res_word_t	pars_replstr_token;
+extern pars_res_word_t	pars_concat_token;
+extern pars_res_word_t	pars_length_token;
+extern pars_res_word_t	pars_instr_token;
+extern pars_res_word_t	pars_sysdate_token;
+extern pars_res_word_t	pars_printf_token;
+extern pars_res_word_t	pars_assert_token;
+extern pars_res_word_t	pars_rnd_token;
+extern pars_res_word_t	pars_rnd_str_token;
+extern pars_res_word_t	pars_count_token;
+extern pars_res_word_t	pars_sum_token;
+extern pars_res_word_t	pars_distinct_token;
+extern pars_res_word_t	pars_binary_token;
+extern pars_res_word_t	pars_blob_token;
+extern pars_res_word_t	pars_int_token;
+extern pars_res_word_t	pars_bigint_token;
+extern pars_res_word_t	pars_char_token;
+extern pars_res_word_t	pars_float_token;
+extern pars_res_word_t	pars_update_token;
+extern pars_res_word_t	pars_asc_token;
+extern pars_res_word_t	pars_desc_token;
+extern pars_res_word_t	pars_open_token;
+extern pars_res_word_t	pars_close_token;
+extern pars_res_word_t	pars_share_token;
+extern pars_res_word_t	pars_unique_token;
+extern pars_res_word_t	pars_clustered_token;
+
+extern ulint		pars_star_denoter;
+
+/* Procedure parameter types */
+#define PARS_INPUT	0
+#define PARS_OUTPUT	1
+#define PARS_NOT_PARAM	2
+
+int
+yyparse(void);
+
+/*************************************************************//**
+Parses an SQL string returning the query graph.
+@return	own: the query graph */
+UNIV_INTERN
+que_t*
+pars_sql(
+/*=====*/
+	pars_info_t*	info,	/*!< in: extra information, or NULL */
+	const char*	str);	/*!< in: SQL string */
+/*************************************************************//**
+Retrieves characters to the lexical analyzer.
+@return number of characters copied or 0 on EOF */
+UNIV_INTERN
+int
+pars_get_lex_chars(
+/*===============*/
+	char*	buf,		/*!< in/out: buffer where to copy */
+	int	max_size);	/*!< in: maximum number of characters which fit
+				in the buffer */
+/*************************************************************//**
+Called by yyparse on error. */
+UNIV_INTERN
+void
+yyerror(
+/*====*/
+	const char*	s);	/*!< in: error message string */
+/*********************************************************************//**
+Parses a variable declaration.
+@return	own: symbol table node of type SYM_VAR */
+UNIV_INTERN
+sym_node_t*
+pars_variable_declaration(
+/*======================*/
+	sym_node_t*	node,	/*!< in: symbol table node allocated for the
+				id of the variable */
+	pars_res_word_t* type);	/*!< in: pointer to a type token */
+/*********************************************************************//**
+Parses a function expression.
+@return	own: function node in a query tree */
+UNIV_INTERN
+func_node_t*
+pars_func(
+/*======*/
+	que_node_t*	res_word,/*!< in: function name reserved word */
+	que_node_t*	arg);	/*!< in: first argument in the argument list */
+/*************************************************************************
+Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
+within the search string.
+@return	own: function node in a query tree */
+UNIV_INTERN
+int
+pars_like_rebind(
+/*=============*/
+        sym_node_t*     node,   /* in: The search string node.*/
+        const byte*     ptr,    /* in: literal to (re) bind */
+        ulint           len);   /* in: length of literal to (re) bind*/
+/*********************************************************************//**
+Parses an operator expression.
+@return	own: function node in a query tree */
+UNIV_INTERN
+func_node_t*
+pars_op(
+/*====*/
+	int		func,	/*!< in: operator token code */
+	que_node_t*	arg1,	/*!< in: first argument */
+	que_node_t*	arg2);	/*!< in: second argument or NULL for an unary
+				operator */
+/*********************************************************************//**
+Parses an ORDER BY clause. Order by a single column only is supported.
+@return	own: order-by node in a query tree */
+UNIV_INTERN
+order_node_t*
+pars_order_by(
+/*==========*/
+	sym_node_t*	column,	/*!< in: column name */
+	pars_res_word_t* asc);	/*!< in: &pars_asc_token or pars_desc_token */
+/*********************************************************************//**
+Parses a select list; creates a query graph node for the whole SELECT
+statement.
+@return	own: select node in a query tree */
+UNIV_INTERN
+sel_node_t*
+pars_select_list(
+/*=============*/
+	que_node_t*	select_list,	/*!< in: select list */
+	sym_node_t*	into_list);	/*!< in: variables list or NULL */
+/*********************************************************************//**
+Parses a cursor declaration.
+@return	sym_node */
+UNIV_INTERN
+que_node_t*
+pars_cursor_declaration(
+/*====================*/
+	sym_node_t*	sym_node,	/*!< in: cursor id node in the symbol
+					table */
+	sel_node_t*	select_node);	/*!< in: select node */
+/*********************************************************************//**
+Parses a function declaration.
+@return	sym_node */
+UNIV_INTERN
+que_node_t*
+pars_function_declaration(
+/*======================*/
+	sym_node_t*	sym_node);	/*!< in: function id node in the symbol
+					table */
+/*********************************************************************//**
+Parses a select statement.
+@return	own: select node in a query tree */
+UNIV_INTERN
+sel_node_t*
+pars_select_statement(
+/*==================*/
+	sel_node_t*	select_node,	/*!< in: select node already containing
+					the select list */
+	sym_node_t*	table_list,	/*!< in: table list */
+	que_node_t*	search_cond,	/*!< in: search condition or NULL */
+	pars_res_word_t* for_update,	/*!< in: NULL or &pars_update_token */
+	pars_res_word_t* consistent_read,/*!< in: NULL or
+						&pars_consistent_token */
+	order_node_t*	order_by);	/*!< in: NULL or an order-by node */
+/*********************************************************************//**
+Parses a column assignment in an update.
+@return	column assignment node */
+UNIV_INTERN
+col_assign_node_t*
+pars_column_assignment(
+/*===================*/
+	sym_node_t*	column,	/*!< in: column to assign */
+	que_node_t*	exp);	/*!< in: value to assign */
+/*********************************************************************//**
+Parses a delete or update statement start.
+@return	own: update node in a query tree */
+UNIV_INTERN
+upd_node_t*
+pars_update_statement_start(
+/*========================*/
+	ibool		is_delete,	/*!< in: TRUE if delete */
+	sym_node_t*	table_sym,	/*!< in: table name node */
+	col_assign_node_t* col_assign_list);/*!< in: column assignment list, NULL
+					if delete */
+/*********************************************************************//**
+Parses an update or delete statement.
+@return	own: update node in a query tree */
+UNIV_INTERN
+upd_node_t*
+pars_update_statement(
+/*==================*/
+	upd_node_t*	node,		/*!< in: update node */
+	sym_node_t*	cursor_sym,	/*!< in: pointer to a cursor entry in
+					the symbol table or NULL */
+	que_node_t*	search_cond);	/*!< in: search condition or NULL */
+/*********************************************************************//**
+Parses an insert statement.
+@return	own: update node in a query tree */
+UNIV_INTERN
+ins_node_t*
+pars_insert_statement(
+/*==================*/
+	sym_node_t*	table_sym,	/*!< in: table name node */
+	que_node_t*	values_list,	/*!< in: value expression list or NULL */
+	sel_node_t*	select);	/*!< in: select condition or NULL */
+/*********************************************************************//**
+Parses a procedure parameter declaration.
+@return	own: symbol table node of type SYM_VAR */
+UNIV_INTERN
+sym_node_t*
+pars_parameter_declaration(
+/*=======================*/
+	sym_node_t*	node,	/*!< in: symbol table node allocated for the
+				id of the parameter */
+	ulint		param_type,
+				/*!< in: PARS_INPUT or PARS_OUTPUT */
+	pars_res_word_t* type);	/*!< in: pointer to a type token */
+/*********************************************************************//**
+Parses an elsif element.
+@return	elsif node */
+UNIV_INTERN
+elsif_node_t*
+pars_elsif_element(
+/*===============*/
+	que_node_t*	cond,		/*!< in: if-condition */
+	que_node_t*	stat_list);	/*!< in: statement list */
+/*********************************************************************//**
+Parses an if-statement.
+@return	if-statement node */
+UNIV_INTERN
+if_node_t*
+pars_if_statement(
+/*==============*/
+	que_node_t*	cond,		/*!< in: if-condition */
+	que_node_t*	stat_list,	/*!< in: statement list */
+	que_node_t*	else_part);	/*!< in: else-part statement list */
+/*********************************************************************//**
+Parses a for-loop-statement.
+@return	for-statement node */
+UNIV_INTERN
+for_node_t*
+pars_for_statement(
+/*===============*/
+	sym_node_t*	loop_var,	/*!< in: loop variable */
+	que_node_t*	loop_start_limit,/*!< in: loop start expression */
+	que_node_t*	loop_end_limit,	/*!< in: loop end expression */
+	que_node_t*	stat_list);	/*!< in: statement list */
+/*********************************************************************//**
+Parses a while-statement.
+@return	while-statement node */
+UNIV_INTERN
+while_node_t*
+pars_while_statement(
+/*=================*/
+	que_node_t*	cond,		/*!< in: while-condition */
+	que_node_t*	stat_list);	/*!< in: statement list */
+/*********************************************************************//**
+Parses an exit statement.
+@return	exit statement node */
+UNIV_INTERN
+exit_node_t*
+pars_exit_statement(void);
+/*=====================*/
+/*********************************************************************//**
+Parses a return-statement.
+@return	return-statement node */
+UNIV_INTERN
+return_node_t*
+pars_return_statement(void);
+/*=======================*/
+/*********************************************************************//**
+Parses a procedure call.
+@return	function node */
+UNIV_INTERN
+func_node_t*
+pars_procedure_call(
+/*================*/
+	que_node_t*	res_word,/*!< in: procedure name reserved word */
+	que_node_t*	args);	/*!< in: argument list */
+/*********************************************************************//**
+Parses an assignment statement.
+@return	assignment statement node */
+UNIV_INTERN
+assign_node_t*
+pars_assignment_statement(
+/*======================*/
+	sym_node_t*	var,	/*!< in: variable to assign */
+	que_node_t*	val);	/*!< in: value to assign */
+/*********************************************************************//**
+Parses a fetch statement. into_list or user_func (but not both) must be
+non-NULL.
+@return	fetch statement node */
+UNIV_INTERN
+fetch_node_t*
+pars_fetch_statement(
+/*=================*/
+	sym_node_t*	cursor,		/*!< in: cursor node */
+	sym_node_t*	into_list,	/*!< in: variables to set, or NULL */
+	sym_node_t*	user_func);	/*!< in: user function name, or NULL */
+/*********************************************************************//**
+Parses an open or close cursor statement.
+@return	fetch statement node */
+UNIV_INTERN
+open_node_t*
+pars_open_statement(
+/*================*/
+	ulint		type,	/*!< in: ROW_SEL_OPEN_CURSOR
+				or ROW_SEL_CLOSE_CURSOR */
+	sym_node_t*	cursor);	/*!< in: cursor node */
+/*********************************************************************//**
+Parses a row_printf-statement.
+@return	row_printf-statement node */
+UNIV_INTERN
+row_printf_node_t*
+pars_row_printf_statement(
+/*======================*/
+	sel_node_t*	sel_node);	/*!< in: select node */
+/*********************************************************************//**
+Parses a commit statement.
+@return	own: commit node struct */
+UNIV_INTERN
+commit_node_t*
+pars_commit_statement(void);
+/*=======================*/
+/*********************************************************************//**
+Parses a rollback statement.
+@return	own: rollback node struct */
+UNIV_INTERN
+roll_node_t*
+pars_rollback_statement(void);
+/*=========================*/
+/*********************************************************************//**
+Parses a column definition at a table creation.
+@return	column sym table node */
+UNIV_INTERN
+sym_node_t*
+pars_column_def(
+/*============*/
+	sym_node_t*		sym_node,	/*!< in: column node in the
+						symbol table */
+	pars_res_word_t*	type,		/*!< in: data type */
+	sym_node_t*		len,		/*!< in: length of column, or
+						NULL */
+	void*			is_unsigned,	/*!< in: if not NULL, column
+						is of type UNSIGNED. */
+	void*			is_not_null);	/*!< in: if not NULL, column
+						is of type NOT NULL. */
+/*********************************************************************//**
+Parses a table creation operation.
+@return	table create subgraph */
+UNIV_INTERN
+tab_node_t*
+pars_create_table(
+/*==============*/
+	sym_node_t*	table_sym,	/*!< in: table name node in the symbol
+					table */
+	sym_node_t*	column_defs,	/*!< in: list of column names */
+	sym_node_t*	compact,	/* in: non-NULL if COMPACT table. */
+	sym_node_t*	block_size,	/* in: block size (can be NULL) */
+	void*		not_fit_in_memory);
+					/*!< in: a non-NULL pointer means that
+					this is a table which in simulations
+					should be simulated as not fitting
+					in memory; thread is put to sleep
+					to simulate disk accesses; NOTE that
+					this flag is not stored to the data
+					dictionary on disk, and the database
+					will forget about non-NULL value if
+					it has to reload the table definition
+					from disk */
+/*********************************************************************//**
+Parses an index creation operation.
+@return	index create subgraph */
+UNIV_INTERN
+ind_node_t*
+pars_create_index(
+/*==============*/
+	pars_res_word_t* unique_def,	/*!< in: not NULL if a unique index */
+	pars_res_word_t* clustered_def,	/*!< in: not NULL if a clustered index */
+	sym_node_t*	index_sym,	/*!< in: index name node in the symbol
+					table */
+	sym_node_t*	table_sym,	/*!< in: table name node in the symbol
+					table */
+	sym_node_t*	column_list);	/*!< in: list of column names */
+/*********************************************************************//**
+Parses a procedure definition.
+@return	query fork node */
+UNIV_INTERN
+que_fork_t*
+pars_procedure_definition(
+/*======================*/
+	sym_node_t*	sym_node,	/*!< in: procedure id node in the symbol
+					table */
+	sym_node_t*	param_list,	/*!< in: parameter declaration list */
+	que_node_t*	stat_list);	/*!< in: statement list */
+
+/*************************************************************//**
+Parses a stored procedure call, when this is not within another stored
+procedure, that is, the client issues a procedure call directly.
+In MySQL/InnoDB, stored InnoDB procedures are invoked via the
+parsed procedure tree, not via InnoDB SQL, so this function is not used.
+@return	query graph */
+UNIV_INTERN
+que_fork_t*
+pars_stored_procedure_call(
+/*=======================*/
+	sym_node_t*	sym_node);	/*!< in: stored procedure name */
+/******************************************************************//**
+Completes a query graph by adding query thread and fork nodes
+above it and prepares the graph for running. The fork created is of
+type QUE_FORK_MYSQL_INTERFACE.
+@return	query thread node to run */
+UNIV_INTERN
+que_thr_t*
+pars_complete_graph_for_exec(
+/*=========================*/
+	que_node_t*	node,	/*!< in: root node for an incomplete
+				query graph, or NULL for dummy graph */
+	trx_t*		trx,	/*!< in: transaction handle */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	__attribute__((nonnull(2,3), warn_unused_result));
+
+/****************************************************************//**
+Create parser info struct.
+@return	own: info struct */
+UNIV_INTERN
+pars_info_t*
+pars_info_create(void);
+/*==================*/
+
+/****************************************************************//**
+Free info struct and everything it contains. */
+UNIV_INTERN
+void
+pars_info_free(
+/*===========*/
+	pars_info_t*	info);	/*!< in, own: info struct */
+
+/****************************************************************//**
+Add bound literal. */
+UNIV_INTERN
+void
+pars_info_add_literal(
+/*==================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const void*	address,	/*!< in: address */
+	ulint		length,		/*!< in: length of data */
+	ulint		type,		/*!< in: type, e.g. DATA_FIXBINARY */
+	ulint		prtype);	/*!< in: precise type, e.g.
+					DATA_UNSIGNED */
+
+/****************************************************************//**
+Equivalent to pars_info_add_literal(info, name, str, strlen(str),
+DATA_VARCHAR, DATA_ENGLISH). */
+UNIV_INTERN
+void
+pars_info_add_str_literal(
+/*======================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const char*	str);		/*!< in: string */
+/********************************************************************
+If the literal value already exists then it rebinds otherwise it
+creates a new entry.*/
+UNIV_INTERN
+void
+pars_info_bind_literal(
+/*===================*/
+	pars_info_t*	info,		/* in: info struct */
+	const char*	name,		/* in: name */
+	const void*	address,	/* in: address */
+	ulint		length,		/* in: length of data */
+	ulint		type,		/* in: type, e.g. DATA_FIXBINARY */
+	ulint		prtype);	/* in: precise type, e.g. */
+/********************************************************************
+If the literal value already exists then it rebinds otherwise it
+creates a new entry.*/
+UNIV_INTERN
+void
+pars_info_bind_varchar_literal(
+/*===========================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const byte*	str,		/*!< in: string */
+	ulint		str_len);	/*!< in: string length */
+/****************************************************************//**
+Equivalent to:
+
+char buf[4];
+mach_write_to_4(buf, val);
+pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
+
+except that the buffer is dynamically allocated from the info struct's
+heap. */
+UNIV_INTERN
+void
+pars_info_bind_int4_literal(
+/*=======================*/
+	pars_info_t*		info,		/*!< in: info struct */
+	const char*		name,		/*!< in: name */
+	const ib_uint32_t*	val);		/*!< in: value */
+/********************************************************************
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_int8_literal(
+/*=======================*/
+	pars_info_t*		info,		/*!< in: info struct */
+	const char*		name,		/*!< in: name */
+	const ib_uint64_t*	val);		/*!< in: value */
+/****************************************************************//**
+Add user function. */
+UNIV_INTERN
+void
+pars_info_bind_function(
+/*===================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name,	/*!< in: function name */
+	pars_user_func_cb_t	func,	/*!< in: function address */
+	void*			arg);	/*!< in: user-supplied argument */
+/****************************************************************//**
+Add bound id. */
+UNIV_INTERN
+void
+pars_info_bind_id(
+/*=============*/
+	pars_info_t*		info,	/*!< in: info struct */
+	ibool			copy_name,/* in: make a copy of name if TRUE */
+	const char*		name,	/*!< in: name */
+	const char*		id);	/*!< in: id */
+/****************************************************************//**
+Equivalent to:
+
+char buf[4];
+mach_write_to_4(buf, val);
+pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
+
+except that the buffer is dynamically allocated from the info struct's
+heap. */
+UNIV_INTERN
+void
+pars_info_add_int4_literal(
+/*=======================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	lint		val);		/*!< in: value */
+
+/****************************************************************//**
+Equivalent to:
+
+char buf[8];
+mach_write_to_8(buf, val);
+pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
+
+except that the buffer is dynamically allocated from the info struct's
+heap. */
+UNIV_INTERN
+void
+pars_info_add_ull_literal(
+/*======================*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	ib_uint64_t	val);		/*!< in: value */
+
+/****************************************************************//**
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_ull_literal(
+/*=======================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name,	/*!< in: name */
+	const ib_uint64_t*	val)	/*!< in: value */
+	__attribute__((nonnull));
+
+/****************************************************************//**
+Add bound id. */
+UNIV_INTERN
+void
+pars_info_add_id(
+/*=============*/
+	pars_info_t*	info,		/*!< in: info struct */
+	const char*	name,		/*!< in: name */
+	const char*	id);		/*!< in: id */
+
+/****************************************************************//**
+Get bound literal with the given name.
+@return	bound literal, or NULL if not found */
+UNIV_INTERN
+pars_bound_lit_t*
+pars_info_get_bound_lit(
+/*====================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name);	/*!< in: bound literal name to find */
+
+/****************************************************************//**
+Get bound id with the given name.
+@return	bound id, or NULL if not found */
+UNIV_INTERN
+pars_bound_id_t*
+pars_info_get_bound_id(
+/*===================*/
+	pars_info_t*		info,	/*!< in: info struct */
+	const char*		name);	/*!< in: bound id name to find */
+
+/******************************************************************//**
+Release any resources used by the lexer. */
+UNIV_INTERN
+void
+pars_lexer_close(void);
+/*==================*/
+
+/** Extra information supplied for pars_sql(). */
+struct pars_info_t {
+	mem_heap_t*	heap;		/*!< our own memory heap */
+
+	ib_vector_t*	funcs;		/*!< user functions, or NUll
+					(pars_user_func_t*) */
+	ib_vector_t*	bound_lits;	/*!< bound literals, or NULL
+					(pars_bound_lit_t*) */
+	ib_vector_t*	bound_ids;	/*!< bound ids, or NULL
+					(pars_bound_id_t*) */
+
+	ibool		graph_owns_us;	/*!< if TRUE (which is the default),
+					que_graph_free() will free us */
+};
+
+/** User-supplied function and argument. */
+struct pars_user_func_t {
+	const char*		name;	/*!< function name */
+	pars_user_func_cb_t	func;	/*!< function address */
+	void*			arg;	/*!< user-supplied argument */
+};
+
+/** Bound literal. */
+struct pars_bound_lit_t {
+	const char*	name;		/*!< name */
+	const void*	address;	/*!< address */
+	ulint		length;		/*!< length of data */
+	ulint		type;		/*!< type, e.g. DATA_FIXBINARY */
+	ulint		prtype;		/*!< precise type, e.g. DATA_UNSIGNED */
+	sym_node_t*	node;		/*!< symbol node */
+};
+
+/** Bound identifier. */
+struct pars_bound_id_t {
+	const char*	name;		/*!< name */
+	const char*	id;		/*!< identifier */
+};
+
+/** Struct used to denote a reserved word in a parsing tree */
+struct pars_res_word_t{
+	int	code;	/*!< the token code for the reserved word from
+			pars0grm.h */
+};
+
+/** A predefined function or operator node in a parsing tree; this construct
+is also used for some non-functions like the assignment ':=' */
+struct func_node_t{
+	que_common_t	common;	/*!< type: QUE_NODE_FUNC */
+	int		func;	/*!< token code of the function name */
+	ulint		fclass;	/*!< class of the function */
+	que_node_t*	args;	/*!< argument(s) of the function */
+	UT_LIST_NODE_T(func_node_t) cond_list;
+				/*!< list of comparison conditions; defined
+				only for comparison operator nodes except,
+				presently, for OPT_SCROLL_TYPE ones */
+	UT_LIST_NODE_T(func_node_t) func_node_list;
+				/*!< list of function nodes in a parsed
+				query graph */
+};
+
+/** An order-by node in a select */
+struct order_node_t{
+	que_common_t	common;	/*!< type: QUE_NODE_ORDER */
+	sym_node_t*	column;	/*!< order-by column */
+	ibool		asc;	/*!< TRUE if ascending, FALSE if descending */
+};
+
+/** Procedure definition node */
+struct proc_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_PROC */
+	sym_node_t*	proc_id;	/*!< procedure name symbol in the symbol
+					table of this same procedure */
+	sym_node_t*	param_list;	/*!< input and output parameters */
+	que_node_t*	stat_list;	/*!< statement list */
+	sym_tab_t*	sym_tab;	/*!< symbol table of this procedure */
+};
+
+/** elsif-element node */
+struct elsif_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_ELSIF */
+	que_node_t*	cond;		/*!< if condition */
+	que_node_t*	stat_list;	/*!< statement list */
+};
+
+/** if-statement node */
+struct if_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_IF */
+	que_node_t*	cond;		/*!< if condition */
+	que_node_t*	stat_list;	/*!< statement list */
+	que_node_t*	else_part;	/*!< else-part statement list */
+	elsif_node_t*	elsif_list;	/*!< elsif element list */
+};
+
+/** while-statement node */
+struct while_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_WHILE */
+	que_node_t*	cond;		/*!< while condition */
+	que_node_t*	stat_list;	/*!< statement list */
+};
+
+/** for-loop-statement node */
+struct for_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_FOR */
+	sym_node_t*	loop_var;	/*!< loop variable: this is the
+					dereferenced symbol from the
+					variable declarations, not the
+					symbol occurrence in the for loop
+					definition */
+	que_node_t*	loop_start_limit;/*!< initial value of loop variable */
+	que_node_t*	loop_end_limit;	/*!< end value of loop variable */
+	lint		loop_end_value;	/*!< evaluated value for the end value:
+					it is calculated only when the loop
+					is entered, and will not change within
+					the loop */
+	que_node_t*	stat_list;	/*!< statement list */
+};
+
+/** exit statement node */
+struct exit_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_EXIT */
+};
+
+/** return-statement node */
+struct return_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_RETURN */
+};
+
+/** Assignment statement node */
+struct assign_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_ASSIGNMENT */
+	sym_node_t*	var;		/*!< variable to set */
+	que_node_t*	val;		/*!< value to assign */
+};
+
+/** Column assignment node */
+struct col_assign_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_COL_ASSIGN */
+	sym_node_t*	col;		/*!< column to set */
+	que_node_t*	val;		/*!< value to assign */
+};
+
+/** Classes of functions */
+/* @{ */
+#define PARS_FUNC_ARITH		1	/*!< +, -, *, / */
+#define	PARS_FUNC_LOGICAL	2	/*!< AND, OR, NOT */
+#define PARS_FUNC_CMP		3	/*!< comparison operators */
+#define	PARS_FUNC_PREDEFINED	4	/*!< TO_NUMBER, SUBSTR, ... */
+#define	PARS_FUNC_AGGREGATE	5	/*!< COUNT, DISTINCT, SUM */
+#define	PARS_FUNC_OTHER		6	/*!< these are not real functions,
+					e.g., := */
+/* @} */
+
+#ifndef UNIV_NONINL
+#include "pars0pars.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic
new file mode 100644
index 00000000000..4c88337a265
--- /dev/null
+++ b/storage/innobase/include/pars0pars.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0pars.ic
+SQL parser
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
new file mode 100644
index 00000000000..bcf73639228
--- /dev/null
+++ b/storage/innobase/include/pars0sym.h
@@ -0,0 +1,258 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0sym.h
+SQL parser symbol table
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0sym_h
+#define pars0sym_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "dict0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+
+/******************************************************************//**
+Creates a symbol table for a single stored procedure or query.
+@return	own: symbol table */
+UNIV_INTERN
+sym_tab_t*
+sym_tab_create(
+/*===========*/
+	mem_heap_t*	heap);	/*!< in: memory heap where to create */
+/******************************************************************//**
+Frees the memory allocated dynamically AFTER parsing phase for variables
+etc. in the symbol table. Does not free the mem heap where the table was
+originally created. Frees also SQL explicit cursor definitions. */
+UNIV_INTERN
+void
+sym_tab_free_private(
+/*=================*/
+	sym_tab_t*	sym_tab);	/*!< in, own: symbol table */
+/******************************************************************//**
+Adds an integer literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_int_lit(
+/*================*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	ulint		val);		/*!< in: integer value */
+/******************************************************************//**
+Adds an string literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_str_lit(
+/*================*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const byte*	str,		/*!< in: string with no quotes around
+					it */
+	ulint		len);		/*!< in: string length */
+/******************************************************************//**
+Add a bound literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_bound_lit(
+/*==================*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const char*	name,		/*!< in: name of bound literal */
+	ulint*		lit_type);	/*!< out: type of literal (PARS_*_LIT) */
+/**********************************************************************
+Rebind literal to a node in the symbol table. */
+
+sym_node_t*
+sym_tab_rebind_lit(
+/*===============*/
+                                        /* out: symbol table node */
+        sym_node_t*     node,           /* in: node that is bound to literal*/
+        const void*     address,        /* in: pointer to data */
+        ulint           length);        /* in: length of data */
+/******************************************************************//**
+Adds an SQL null literal to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_null_lit(
+/*=================*/
+	sym_tab_t*	sym_tab);	/*!< in: symbol table */
+/******************************************************************//**
+Adds an identifier to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_id(
+/*===========*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	byte*		name,		/*!< in: identifier name */
+	ulint		len);		/*!< in: identifier length */
+
+/******************************************************************//**
+Add a bound identifier to a symbol table.
+@return	symbol table node */
+UNIV_INTERN
+sym_node_t*
+sym_tab_add_bound_id(
+/*===========*/
+	sym_tab_t*	sym_tab,	/*!< in: symbol table */
+	const char*	name);		/*!< in: name of bound id */
+
+/** Index of sym_node_t::field_nos corresponding to the clustered index */
+#define	SYM_CLUST_FIELD_NO	0
+/** Index of sym_node_t::field_nos corresponding to a secondary index */
+#define	SYM_SEC_FIELD_NO	1
+
+/** Types of a symbol table node */
+enum sym_tab_entry {
+	SYM_UNSET,		/*!< Unset entry. */
+	SYM_VAR = 91,		/*!< declared parameter or local
+				variable of a procedure */
+	SYM_IMPLICIT_VAR,	/*!< storage for a intermediate result
+				of a calculation */
+	SYM_LIT,		/*!< literal */
+	SYM_TABLE_REF_COUNTED,	/*!< database table name, ref counted. Must
+				be closed explicitly. */
+	SYM_TABLE,		/*!< database table name */
+	SYM_COLUMN,		/*!< database table name */
+	SYM_CURSOR,		/*!< named cursor */
+	SYM_PROCEDURE_NAME,	/*!< stored procedure name */
+	SYM_INDEX,		/*!< database index name */
+	SYM_FUNCTION		/*!< user function name */
+};
+
+/** Symbol table node */
+struct sym_node_t{
+	que_common_t			common;		/*!< node type:
+							QUE_NODE_SYMBOL */
+	/* NOTE: if the data field in 'common.val' is not NULL and the symbol
+	table node is not for a temporary column, the memory for the value has
+	been allocated from dynamic memory and it should be freed when the
+	symbol table is discarded */
+
+	/* 'alias' and 'indirection' are almost the same, but not quite.
+	'alias' always points to the primary instance of the variable, while
+	'indirection' does the same only if we should use the primary
+	instance's values for the node's data. This is usually the case, but
+	when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM
+	t WHERE id = x;"), we copy the values from the primary instance to
+	the cursor's instance so that they are fixed for the duration of the
+	cursor, and set 'indirection' to NULL. If we did not, the value of
+	'x' could change between fetches and things would break horribly.
+
+	TODO: It would be cleaner to make 'indirection' a boolean field and
+	always use 'alias' to refer to the primary node. */
+
+	sym_node_t*			indirection;	/*!< pointer to
+							another symbol table
+							node which contains
+							the value for this
+							node, NULL otherwise */
+	sym_node_t*			alias;		/*!< pointer to
+							another symbol table
+							node for which this
+							node is an alias,
+							NULL otherwise */
+	UT_LIST_NODE_T(sym_node_t)	col_var_list;	/*!< list of table
+							columns or a list of
+							input variables for an
+							explicit cursor */
+	ibool				copy_val;	/*!< TRUE if a column
+							and its value should
+							be copied to dynamic
+							memory when fetched */
+	ulint				field_nos[2];	/*!< if a column, in
+							the position
+							SYM_CLUST_FIELD_NO is
+							the field number in the
+							clustered index; in
+							the position
+							SYM_SEC_FIELD_NO
+							the field number in the
+							non-clustered index to
+							use first; if not found
+							from the index, then
+							ULINT_UNDEFINED */
+	ibool				resolved;	/*!< TRUE if the
+							meaning of a variable
+							or a column has been
+							resolved; for literals
+							this is always TRUE */
+	enum sym_tab_entry		token_type;	/*!< type of the
+							parsed token */
+	const char*			name;		/*!< name of an id */
+	ulint				name_len;	/*!< id name length */
+	dict_table_t*			table;		/*!< table definition
+							if a table id or a
+							column id */
+	ulint				col_no;		/*!< column number if a
+							column */
+	sel_buf_t*			prefetch_buf;	/*!< NULL, or a buffer
+							for cached column
+							values for prefetched
+							rows */
+	sel_node_t*			cursor_def;	/*!< cursor definition
+							select node if a
+							named cursor */
+	ulint				param_type;	/*!< PARS_INPUT,
+							PARS_OUTPUT, or
+							PARS_NOT_PARAM if not a
+							procedure parameter */
+	sym_tab_t*			sym_table;	/*!< back pointer to
+							the symbol table */
+	UT_LIST_NODE_T(sym_node_t)	sym_list;	/*!< list of symbol
+							nodes */
+	sym_node_t*			like_node;	/* LIKE operator node*/
+};
+
+/** Symbol table */
+struct sym_tab_t{
+	que_t*			query_graph;
+					/*!< query graph generated by the
+					parser */
+	const char*		sql_string;
+					/*!< SQL string to parse */
+	size_t			string_len;
+					/*!< SQL string length */
+	int			next_char_pos;
+					/*!< position of the next character in
+					sql_string to give to the lexical
+					analyzer */
+	pars_info_t*		info;	/*!< extra information, or NULL */
+	sym_node_list_t		sym_list;
+					/*!< list of symbol nodes in the symbol
+					table */
+	UT_LIST_BASE_NODE_T(func_node_t)
+				func_node_list;
+					/*!< list of function nodes in the
+					parsed query graph */
+	mem_heap_t*		heap;	/*!< memory heap from which we can
+					allocate space */
+};
+
+#ifndef UNIV_NONINL
+#include "pars0sym.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic
new file mode 100644
index 00000000000..266c1a6310d
--- /dev/null
+++ b/storage/innobase/include/pars0sym.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0sym.ic
+SQL parser symbol table
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h
new file mode 100644
index 00000000000..47f4b432d20
--- /dev/null
+++ b/storage/innobase/include/pars0types.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+
+Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/pars0types.h
+SQL parser global types
+
+Created 1/11/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0types_h
+#define pars0types_h
+
+struct pars_info_t;
+struct pars_user_func_t;
+struct pars_bound_lit_t;
+struct pars_bound_id_t;
+struct sym_node_t;
+struct sym_tab_t;
+struct pars_res_word_t;
+struct func_node_t;
+struct order_node_t;
+struct proc_node_t;
+struct elsif_node_t;
+struct if_node_t;
+struct while_node_t;
+struct for_node_t;
+struct exit_node_t;
+struct return_node_t;
+struct assign_node_t;
+struct col_assign_node_t;
+
+typedef UT_LIST_BASE_NODE_T(sym_node_t)	sym_node_list_t;
+
+#endif
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
new file mode 100644
index 00000000000..ba8828623af
--- /dev/null
+++ b/storage/innobase/include/que0que.h
@@ -0,0 +1,530 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0que.h
+Query graph
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0que_h
+#define que0que_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "srv0srv.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "pars0types.h"
+
+/* If the following flag is set TRUE, the module will print trace info
+of SQL execution in the UNIV_SQL_DEBUG version */
+extern ibool	que_trace_on;
+
+/** Mutex protecting the query threads. */
+extern ib_mutex_t	que_thr_mutex;
+
+/***********************************************************************//**
+Creates a query graph fork node.
+@return	own: fork node */
+UNIV_INTERN
+que_fork_t*
+que_fork_create(
+/*============*/
+	que_t*		graph,		/*!< in: graph, if NULL then this
+					fork node is assumed to be the
+					graph root */
+	que_node_t*	parent,		/*!< in: parent node */
+	ulint		fork_type,	/*!< in: fork type */
+	mem_heap_t*	heap);		/*!< in: memory heap where created */
+/***********************************************************************//**
+Gets the first thr in a fork. */
+UNIV_INLINE
+que_thr_t*
+que_fork_get_first_thr(
+/*===================*/
+	que_fork_t*	fork);	/*!< in: query fork */
+/***********************************************************************//**
+Gets the child node of the first thr in a fork. */
+UNIV_INLINE
+que_node_t*
+que_fork_get_child(
+/*===============*/
+	que_fork_t*	fork);	/*!< in: query fork */
+/***********************************************************************//**
+Sets the parent of a graph node. */
+UNIV_INLINE
+void
+que_node_set_parent(
+/*================*/
+	que_node_t*	node,	/*!< in: graph node */
+	que_node_t*	parent);/*!< in: parent */
+/***********************************************************************//**
+Creates a query graph thread node.
+@return	own: query thread node */
+UNIV_INTERN
+que_thr_t*
+que_thr_create(
+/*===========*/
+	que_fork_t*	parent,	/*!< in: parent node, i.e., a fork node */
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/**********************************************************************//**
+Frees a query graph, but not the heap where it was created. Does not free
+explicit cursor declarations, they are freed in que_graph_free. */
+UNIV_INTERN
+void
+que_graph_free_recursive(
+/*=====================*/
+	que_node_t*	node);	/*!< in: query graph node */
+/**********************************************************************//**
+Frees a query graph. */
+UNIV_INTERN
+void
+que_graph_free(
+/*===========*/
+	que_t*	graph);	/*!< in: query graph; we assume that the memory
+			heap where this graph was created is private
+			to this graph: if not, then use
+			que_graph_free_recursive and free the heap
+			afterwards! */
+/**********************************************************************//**
+Stops a query thread if graph or trx is in a state requiring it. The
+conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex
+has to be reserved.
+@return	TRUE if stopped */
+UNIV_INTERN
+ibool
+que_thr_stop(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction. */
+UNIV_INTERN
+void
+que_thr_move_to_run_state_for_mysql(
+/*================================*/
+	que_thr_t*	thr,	/*!< in: an query thread */
+	trx_t*		trx);	/*!< in: transaction */
+/**********************************************************************//**
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select, when there is no error or lock wait. */
+UNIV_INTERN
+void
+que_thr_stop_for_mysql_no_error(
+/*============================*/
+	que_thr_t*	thr,	/*!< in: query thread */
+	trx_t*		trx);	/*!< in: transaction */
+/**********************************************************************//**
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
+query thread is stopped and made inactive, except in the case where
+it was put to the lock wait state in lock0lock.cc, but the lock has already
+been granted or the transaction chosen as a victim in deadlock resolution. */
+UNIV_INTERN
+void
+que_thr_stop_for_mysql(
+/*===================*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Run a query thread. Handles lock waits. */
+UNIV_INTERN
+void
+que_run_threads(
+/*============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Moves a suspended query thread to the QUE_THR_RUNNING state and release
+a worker thread to execute it. This function should be used to end
+the wait state of a query thread waiting for a lock or a stored procedure
+completion.
+@return query thread instance of thread to wakeup or NULL  */
+UNIV_INTERN
+que_thr_t*
+que_thr_end_lock_wait(
+/*==================*/
+	trx_t*		trx);		/*!< in: transaction in the
+					QUE_THR_LOCK_WAIT state */
+/**********************************************************************//**
+Starts execution of a command in a query fork. Picks a query thread which
+is not in the QUE_THR_RUNNING state and moves it to that state. If none
+can be chosen, a situation which may arise in parallelized fetches, NULL
+is returned.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
+que_thr_t*
+que_fork_start_command(
+/*===================*/
+	que_fork_t*	fork);	/*!< in: a query fork */
+/***********************************************************************//**
+Gets the trx of a query thread. */
+UNIV_INLINE
+trx_t*
+thr_get_trx(
+/*========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/*******************************************************************//**
+Determines if this thread is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if thr is rolling back an incomplete transaction in crash
+recovery */
+UNIV_INLINE
+ibool
+thr_is_recv(
+/*========*/
+	const que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************************//**
+Gets the type of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_type(
+/*==============*/
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
+Gets pointer to the value data type field of a graph node. */
+UNIV_INLINE
+dtype_t*
+que_node_get_data_type(
+/*===================*/
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
+Gets pointer to the value dfield of a graph node. */
+UNIV_INLINE
+dfield_t*
+que_node_get_val(
+/*=============*/
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
+Gets the value buffer size of a graph node.
+@return	val buffer size, not defined if val.data == NULL in node */
+UNIV_INLINE
+ulint
+que_node_get_val_buf_size(
+/*======================*/
+	que_node_t*	node);	/*!< in: graph node */
+/***********************************************************************//**
+Sets the value buffer size of a graph node. */
+UNIV_INLINE
+void
+que_node_set_val_buf_size(
+/*======================*/
+	que_node_t*	node,	/*!< in: graph node */
+	ulint		size);	/*!< in: size */
+/*********************************************************************//**
+Gets the next list node in a list of query graph nodes. */
+UNIV_INLINE
+que_node_t*
+que_node_get_next(
+/*==============*/
+	que_node_t*	node);	/*!< in: node in a list */
+/*********************************************************************//**
+Gets the parent node of a query graph node.
+@return	parent node or NULL */
+UNIV_INLINE
+que_node_t*
+que_node_get_parent(
+/*================*/
+	que_node_t*	node);	/*!< in: node */
+/****************************************************************//**
+Get the first containing loop node (e.g. while_node_t or for_node_t) for the
+given node, or NULL if the node is not within a loop.
+@return	containing loop node, or NULL. */
+UNIV_INTERN
+que_node_t*
+que_node_get_containing_loop_node(
+/*==============================*/
+	que_node_t*	node);	/*!< in: node */
+/*********************************************************************//**
+Catenates a query graph node to a list of them, possible empty list.
+@return	one-way list of nodes */
+UNIV_INLINE
+que_node_t*
+que_node_list_add_last(
+/*===================*/
+	que_node_t*	node_list,	/*!< in: node list, or NULL */
+	que_node_t*	node);		/*!< in: node */
+/*************************************************************************
+Get the last node from the list.*/
+UNIV_INLINE
+que_node_t*
+que_node_list_get_last(
+/*===================*/
+					/* out: node last node from list.*/
+	que_node_t*	node_list);	/* in: node list, or NULL */
+/*********************************************************************//**
+Gets a query graph node list length.
+@return	length, for NULL list 0 */
+UNIV_INLINE
+ulint
+que_node_list_get_len(
+/*==================*/
+	que_node_t*	node_list);	/*!< in: node list, or NULL */
+/**********************************************************************//**
+Checks if graph, trx, or session is in a state where the query thread should
+be stopped.
+@return TRUE if should be stopped; NOTE that if the peek is made
+without reserving the trx_t::mutex, then another peek with the mutex
+reserved is necessary before deciding the actual stopping */
+UNIV_INLINE
+ibool
+que_thr_peek_stop(
+/*==============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/***********************************************************************//**
+Returns TRUE if the query graph is for a SELECT statement.
+@return	TRUE if a select */
+UNIV_INLINE
+ibool
+que_graph_is_select(
+/*================*/
+	que_t*		graph);		/*!< in: graph */
+/**********************************************************************//**
+Prints info of an SQL query graph node. */
+UNIV_INTERN
+void
+que_node_print_info(
+/*================*/
+	que_node_t*	node);	/*!< in: query graph node */
+/*********************************************************************//**
+Evaluate the given SQL
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+que_eval_sql(
+/*=========*/
+	pars_info_t*	info,	/*!< in: info struct, or NULL */
+	const char*	sql,	/*!< in: SQL string */
+	ibool		reserve_dict_mutex,
+				/*!< in: if TRUE, acquire/release
+				dict_sys->mutex around call to pars_sql. */
+	trx_t*		trx);	/*!< in: trx */
+
+/**********************************************************************//**
+Round robin scheduler.
+@return a query thread of the graph moved to QUE_THR_RUNNING state, or
+NULL; the query thread should be executed by que_run_threads by the
+caller */
+UNIV_INTERN
+que_thr_t*
+que_fork_scheduler_round_robin(
+/*===========================*/
+	que_fork_t*	fork,		/*!< in: a query fork */
+	que_thr_t*	thr);		/*!< in: current pos */
+
+/*********************************************************************//**
+Initialise the query sub-system. */
+UNIV_INTERN
+void
+que_init(void);
+/*==========*/
+
+/*********************************************************************//**
+Close the query sub-system. */
+UNIV_INTERN
+void
+que_close(void);
+/*===========*/
+
+/* Query graph query thread node: the fields are protected by the
+trx_t::mutex with the exceptions named below */
+
+struct que_thr_t{
+	que_common_t	common;		/*!< type: QUE_NODE_THR */
+	ulint		magic_n;	/*!< magic number to catch memory
+					corruption */
+	que_node_t*	child;		/*!< graph child node */
+	que_t*		graph;		/*!< graph where this node belongs */
+	ulint		state;		/*!< state of the query thread */
+	ibool		is_active;	/*!< TRUE if the thread has been set
+					to the run state in
+					que_thr_move_to_run_state, but not
+					deactivated in
+					que_thr_dec_reference_count */
+	/*------------------------------*/
+	/* The following fields are private to the OS thread executing the
+	query thread, and are not protected by any mutex: */
+
+	que_node_t*	run_node;	/*!< pointer to the node where the
+					subgraph down from this node is
+					currently executed */
+	que_node_t*	prev_node;	/*!< pointer to the node from which
+					the control came */
+	ulint		resource;	/*!< resource usage of the query thread
+					thus far */
+	ulint		lock_state;	/*!< lock state of thread (table or
+					row) */
+	struct srv_slot_t*
+			slot;		/* The thread slot in the wait
+					array in srv_sys_t */
+	/*------------------------------*/
+	/* The following fields are links for the various lists that
+	this type can be on. */
+	UT_LIST_NODE_T(que_thr_t)
+			thrs;		/*!< list of thread nodes of the fork
+					node */
+	UT_LIST_NODE_T(que_thr_t)
+			trx_thrs;	/*!< lists of threads in wait list of
+					the trx */
+	UT_LIST_NODE_T(que_thr_t)
+			queue;		/*!< list of runnable thread nodes in
+					the server task queue */
+	ulint		fk_cascade_depth; /*!< maximum cascading call depth
+					supported for foreign key constraint
+					related delete/updates */
+};
+
+#define QUE_THR_MAGIC_N		8476583
+#define QUE_THR_MAGIC_FREED	123461526
+
+/* Query graph fork node: its fields are protected by the query thread mutex */
+struct que_fork_t{
+	que_common_t	common;		/*!< type: QUE_NODE_FORK */
+	que_t*		graph;		/*!< query graph of this node */
+	ulint		fork_type;	/*!< fork type */
+	ulint		n_active_thrs;	/*!< if this is the root of a graph, the
+					number query threads that have been
+					started in que_thr_move_to_run_state
+					but for which que_thr_dec_refer_count
+					has not yet been called */
+	trx_t*		trx;		/*!< transaction: this is set only in
+					the root node */
+	ulint		state;		/*!< state of the fork node */
+	que_thr_t*	caller;		/*!< pointer to a possible calling query
+					thread */
+	UT_LIST_BASE_NODE_T(que_thr_t)
+			thrs;		/*!< list of query threads */
+	/*------------------------------*/
+	/* The fields in this section are defined only in the root node */
+	sym_tab_t*	sym_tab;	/*!< symbol table of the query,
+					generated by the parser, or NULL
+					if the graph was created 'by hand' */
+	pars_info_t*	info;		/*!< info struct, or NULL */
+	/* The following cur_... fields are relevant only in a select graph */
+
+	ulint		cur_end;	/*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
+					QUE_CUR_END */
+	ulint		cur_pos;	/*!< if there are n rows in the result
+					set, values 0 and n + 1 mean before
+					first row, or after last row, depending
+					on cur_end; values 1...n mean a row
+					index */
+	ibool		cur_on_row;	/*!< TRUE if cursor is on a row, i.e.,
+					it is not before the first row or
+					after the last row */
+	sel_node_t*	last_sel_node;	/*!< last executed select node, or NULL
+					if none */
+	UT_LIST_NODE_T(que_fork_t)
+			graphs;		/*!< list of query graphs of a session
+					or a stored procedure */
+	/*------------------------------*/
+	mem_heap_t*	heap;		/*!< memory heap where the fork was
+					created */
+
+};
+
+/* Query fork (or graph) types */
+#define QUE_FORK_SELECT_NON_SCROLL	1	/* forward-only cursor */
+#define QUE_FORK_SELECT_SCROLL		2	/* scrollable cursor */
+#define QUE_FORK_INSERT			3
+#define QUE_FORK_UPDATE			4
+#define QUE_FORK_ROLLBACK		5
+			/* This is really the undo graph used in rollback,
+			no signal-sending roll_node in this graph */
+#define QUE_FORK_PURGE			6
+#define	QUE_FORK_EXECUTE		7
+#define QUE_FORK_PROCEDURE		8
+#define QUE_FORK_PROCEDURE_CALL		9
+#define QUE_FORK_MYSQL_INTERFACE	10
+#define	QUE_FORK_RECOVERY		11
+
+/* Query fork (or graph) states */
+#define QUE_FORK_ACTIVE		1
+#define QUE_FORK_COMMAND_WAIT	2
+#define QUE_FORK_INVALID	3
+#define QUE_FORK_BEING_FREED	4
+
+/* Flag which is ORed to control structure statement node types */
+#define QUE_NODE_CONTROL_STAT	1024
+
+/* Query graph node types */
+#define	QUE_NODE_LOCK		1
+#define	QUE_NODE_INSERT		2
+#define QUE_NODE_UPDATE		4
+#define	QUE_NODE_CURSOR		5
+#define	QUE_NODE_SELECT		6
+#define	QUE_NODE_AGGREGATE	7
+#define QUE_NODE_FORK		8
+#define QUE_NODE_THR		9
+#define QUE_NODE_UNDO		10
+#define QUE_NODE_COMMIT		11
+#define QUE_NODE_ROLLBACK	12
+#define QUE_NODE_PURGE		13
+#define QUE_NODE_CREATE_TABLE	14
+#define QUE_NODE_CREATE_INDEX	15
+#define QUE_NODE_SYMBOL		16
+#define QUE_NODE_RES_WORD	17
+#define QUE_NODE_FUNC		18
+#define QUE_NODE_ORDER		19
+#define QUE_NODE_PROC		(20 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_IF		(21 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_WHILE		(22 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_ASSIGNMENT	23
+#define QUE_NODE_FETCH		24
+#define QUE_NODE_OPEN		25
+#define QUE_NODE_COL_ASSIGNMENT	26
+#define QUE_NODE_FOR		(27 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_RETURN		28
+#define QUE_NODE_ROW_PRINTF	29
+#define QUE_NODE_ELSIF		30
+#define QUE_NODE_CALL		31
+#define QUE_NODE_EXIT		32
+
+/* Query thread states */
+#define QUE_THR_RUNNING		1
+#define QUE_THR_PROCEDURE_WAIT	2
+#define	QUE_THR_COMPLETED	3	/* in selects this means that the
+					thread is at the end of its result set
+					(or start, in case of a scroll cursor);
+					in other statements, this means the
+					thread has done its task */
+#define QUE_THR_COMMAND_WAIT	4
+#define QUE_THR_LOCK_WAIT	5
+#define QUE_THR_SUSPENDED	7
+#define QUE_THR_ERROR		8
+
+/* Query thread lock states */
+#define QUE_THR_LOCK_NOLOCK	0
+#define QUE_THR_LOCK_ROW	1
+#define QUE_THR_LOCK_TABLE	2
+
+/* From where the cursor position is counted */
+#define QUE_CUR_NOT_DEFINED	1
+#define QUE_CUR_START		2
+#define	QUE_CUR_END		3
+
+#ifndef UNIV_NONINL
+#include "que0que.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic
new file mode 100644
index 00000000000..eff5a86d958
--- /dev/null
+++ b/storage/innobase/include/que0que.ic
@@ -0,0 +1,309 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0que.ic
+Query graph
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "usr0sess.h"
+
+/***********************************************************************//**
+Gets the trx of a query thread. */
+UNIV_INLINE
+trx_t*
+thr_get_trx(
+/*========*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	ut_ad(thr);
+
+	return(thr->graph->trx);
+}
+
+/*******************************************************************//**
+Determines if this thread is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if thr is rolling back an incomplete transaction in crash
+recovery */
+UNIV_INLINE
+ibool
+thr_is_recv(
+/*========*/
+	const que_thr_t*	thr)	/*!< in: query thread */
+{
+	return(trx_is_recv(thr->graph->trx));
+}
+
+/***********************************************************************//**
+Gets the first thr in a fork. */
+UNIV_INLINE
+que_thr_t*
+que_fork_get_first_thr(
+/*===================*/
+	que_fork_t*	fork)	/*!< in: query fork */
+{
+	return(UT_LIST_GET_FIRST(fork->thrs));
+}
+
+/***********************************************************************//**
+Gets the child node of the first thr in a fork. */
+UNIV_INLINE
+que_node_t*
+que_fork_get_child(
+/*===============*/
+	que_fork_t*	fork)	/*!< in: query fork */
+{
+	que_thr_t*	thr;
+
+	thr = UT_LIST_GET_FIRST(fork->thrs);
+
+	return(thr->child);
+}
+
+/***********************************************************************//**
+Gets the type of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_type(
+/*==============*/
+	que_node_t*	node)	/*!< in: graph node */
+{
+	ut_ad(node);
+
+	return(((que_common_t*) node)->type);
+}
+
+/***********************************************************************//**
+Gets pointer to the value dfield of a graph node. */
+UNIV_INLINE
+dfield_t*
+que_node_get_val(
+/*=============*/
+	que_node_t*	node)	/*!< in: graph node */
+{
+	ut_ad(node);
+
+	return(&(((que_common_t*) node)->val));
+}
+
+/***********************************************************************//**
+Gets the value buffer size of a graph node.
+@return	val buffer size, not defined if val.data == NULL in node */
+UNIV_INLINE
+ulint
+que_node_get_val_buf_size(
+/*======================*/
+	que_node_t*	node)	/*!< in: graph node */
+{
+	ut_ad(node);
+
+	return(((que_common_t*) node)->val_buf_size);
+}
+
+/***********************************************************************//**
+Sets the value buffer size of a graph node. */
+UNIV_INLINE
+void
+que_node_set_val_buf_size(
+/*======================*/
+	que_node_t*	node,	/*!< in: graph node */
+	ulint		size)	/*!< in: size */
+{
+	ut_ad(node);
+
+	((que_common_t*) node)->val_buf_size = size;
+}
+
+/***********************************************************************//**
+Sets the parent of a graph node. */
+UNIV_INLINE
+void
+que_node_set_parent(
+/*================*/
+	que_node_t*	node,	/*!< in: graph node */
+	que_node_t*	parent)	/*!< in: parent */
+{
+	ut_ad(node);
+
+	((que_common_t*) node)->parent = parent;
+}
+
+/***********************************************************************//**
+Gets pointer to the value data type field of a graph node. */
+UNIV_INLINE
+dtype_t*
+que_node_get_data_type(
+/*===================*/
+	que_node_t*	node)	/*!< in: graph node */
+{
+	ut_ad(node);
+
+	return(dfield_get_type(&((que_common_t*) node)->val));
+}
+
+/*********************************************************************//**
+Catenates a query graph node to a list of them, possible empty list.
+@return	one-way list of nodes */
+UNIV_INLINE
+que_node_t*
+que_node_list_add_last(
+/*===================*/
+	que_node_t*	node_list,	/*!< in: node list, or NULL */
+	que_node_t*	node)		/*!< in: node */
+{
+	que_common_t*	cnode;
+	que_common_t*	cnode2;
+
+	cnode = (que_common_t*) node;
+
+	cnode->brother = NULL;
+
+	if (node_list == NULL) {
+
+		return(node);
+	}
+
+	cnode2 = (que_common_t*) node_list;
+
+	while (cnode2->brother != NULL) {
+		cnode2 = (que_common_t*) cnode2->brother;
+	}
+
+	cnode2->brother = node;
+
+	return(node_list);
+}
+
+/*************************************************************************
+Removes a query graph node from the list.*/
+UNIV_INLINE
+que_node_t*
+que_node_list_get_last(
+/*===================*/
+					/* out: last node in list.*/
+	que_node_t*	node_list)	/* in: node list */
+{
+	que_common_t*	node;
+
+	ut_a(node_list != NULL);
+
+	node = (que_common_t*) node_list;
+
+	/* We need the last element */
+	while (node->brother != NULL) {
+		node = (que_common_t*) node->brother;
+	}
+
+	return(node);
+}
+/*********************************************************************//**
+Gets the next list node in a list of query graph nodes.
+@return	next node in a list of nodes */
+UNIV_INLINE
+que_node_t*
+que_node_get_next(
+/*==============*/
+	que_node_t*	node)	/*!< in: node in a list */
+{
+	return(((que_common_t*) node)->brother);
+}
+
+/*********************************************************************//**
+Gets a query graph node list length.
+@return	length, for NULL list 0 */
+UNIV_INLINE
+ulint
+que_node_list_get_len(
+/*==================*/
+	que_node_t*	node_list)	/*!< in: node list, or NULL */
+{
+	const que_common_t*	cnode;
+	ulint			len;
+
+	cnode = (const que_common_t*) node_list;
+	len = 0;
+
+	while (cnode != NULL) {
+		len++;
+		cnode = (const que_common_t*) cnode->brother;
+	}
+
+	return(len);
+}
+
+/*********************************************************************//**
+Gets the parent node of a query graph node.
+@return	parent node or NULL */
+UNIV_INLINE
+que_node_t*
+que_node_get_parent(
+/*================*/
+	que_node_t*	node)	/*!< in: node */
+{
+	return(((que_common_t*) node)->parent);
+}
+
+/**********************************************************************//**
+Checks if graph, trx, or session is in a state where the query thread should
+be stopped.
+@return TRUE if should be stopped; NOTE that if the peek is made
+without reserving the trx mutex, then another peek with the mutex
+reserved is necessary before deciding the actual stopping */
+UNIV_INLINE
+ibool
+que_thr_peek_stop(
+/*==============*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	trx_t*	trx;
+	que_t*	graph;
+
+	graph = thr->graph;
+	trx = graph->trx;
+
+	if (graph->state != QUE_FORK_ACTIVE
+	    || trx->lock.que_state == TRX_QUE_LOCK_WAIT
+	    || (trx->lock.que_state != TRX_QUE_ROLLING_BACK
+		&& trx->lock.que_state != TRX_QUE_RUNNING)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***********************************************************************//**
+Returns TRUE if the query graph is for a SELECT statement.
+@return	TRUE if a select */
+UNIV_INLINE
+ibool
+que_graph_is_select(
+/*================*/
+	que_t*		graph)		/*!< in: graph */
+{
+	if (graph->fork_type == QUE_FORK_SELECT_SCROLL
+	    || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
new file mode 100644
index 00000000000..0f11cad301a
--- /dev/null
+++ b/storage/innobase/include/que0types.h
@@ -0,0 +1,57 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/que0types.h
+Query graph global types
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0types_h
+#define que0types_h
+
+#include "data0data.h"
+#include "dict0types.h"
+
+/* Pseudotype for all graph nodes */
+typedef void	que_node_t;
+
+/* Query graph root is a fork node */
+typedef	struct que_fork_t	que_t;
+
+struct que_thr_t;
+
+/* Common struct at the beginning of each query graph node; the name of this
+substruct must be 'common' */
+
+struct que_common_t{
+	ulint		type;	/*!< query node type */
+	que_node_t*	parent;	/*!< back pointer to parent node, or NULL */
+	que_node_t*	brother;/* pointer to a possible brother node */
+	dfield_t	val;	/*!< evaluated value for an expression */
+	ulint		val_buf_size;
+				/* buffer size for the evaluated value data,
+				if the buffer has been allocated dynamically:
+				if this field is != 0, and the node is a
+				symbol node or a function node, then we
+				have to free the data field in val
+				explicitly */
+};
+
+#endif
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
new file mode 100644
index 00000000000..980faddf98e
--- /dev/null
+++ b/storage/innobase/include/read0read.h
@@ -0,0 +1,193 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0read.h
+Cursor read
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0read_h
+#define read0read_h
+
+#include "univ.i"
+
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "trx0trx.h"
+#include "read0types.h"
+
+/*********************************************************************//**
+Opens a read view where exactly the transactions serialized before this
+point in time are seen in the view.
+@return	own: read view struct */
+UNIV_INTERN
+read_view_t*
+read_view_open_now(
+/*===============*/
+	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
+					transaction, or 0 used in purge */
+	mem_heap_t*	heap);		/*!< in: memory heap from which
+					allocated */
+/*********************************************************************//**
+Makes a copy of the oldest existing read view, or opens a new. The view
+must be closed with ..._close.
+@return	own: read view struct */
+UNIV_INTERN
+read_view_t*
+read_view_purge_open(
+/*=================*/
+	mem_heap_t*	heap);		/*!< in: memory heap from which
+					allocated */
+/*********************************************************************//**
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
+void
+read_view_remove(
+/*=============*/
+	read_view_t*	view,		/*!< in: read view, can be 0 */
+	bool		own_mutex);	/*!< in: true if caller owns the
+					trx_sys_t::mutex */
+/*********************************************************************//**
+Closes a consistent read view for MySQL. This function is called at an SQL
+statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
+UNIV_INTERN
+void
+read_view_close_for_mysql(
+/*======================*/
+	trx_t*	trx);	/*!< in: trx which has a read view */
+/*********************************************************************//**
+Checks if a read view sees the specified transaction.
+@return	true if sees */
+UNIV_INLINE
+bool
+read_view_sees_trx_id(
+/*==================*/
+	const read_view_t*	view,	/*!< in: read view */
+	trx_id_t		trx_id)	/*!< in: trx id */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Prints a read view to stderr. */
+UNIV_INTERN
+void
+read_view_print(
+/*============*/
+	const read_view_t*	view);	/*!< in: read view */
+/*********************************************************************//**
+Create a consistent cursor view for mysql to be used in cursors. In this
+consistent read view modifications done by the creating transaction or future
+transactions are not visible. */
+UNIV_INTERN
+cursor_view_t*
+read_cursor_view_create_for_mysql(
+/*==============================*/
+	trx_t*		cr_trx);/*!< in: trx where cursor view is created */
+/*********************************************************************//**
+Close a given consistent cursor view for mysql and restore global read view
+back to a transaction read view. */
+UNIV_INTERN
+void
+read_cursor_view_close_for_mysql(
+/*=============================*/
+	trx_t*		trx,		/*!< in: trx */
+	cursor_view_t*	curview);	/*!< in: cursor view to be closed */
+/*********************************************************************//**
+This function sets a given consistent cursor view to a transaction
+read view if given consistent cursor view is not NULL. Otherwise, function
+restores a global read view to a transaction read view. */
+UNIV_INTERN
+void
+read_cursor_set_for_mysql(
+/*======================*/
+	trx_t*		trx,	/*!< in: transaction where cursor is set */
+	cursor_view_t*	curview);/*!< in: consistent cursor view to be set */
+
+/** Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+struct read_view_t{
+	ulint		type;	/*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
+	undo_no_t	undo_no;/*!< 0 or if type is
+				VIEW_HIGH_GRANULARITY
+				transaction undo_no when this high-granularity
+				consistent read view was created */
+	trx_id_t	low_limit_no;
+				/*!< The view does not need to see the undo
+				logs for transactions whose transaction number
+				is strictly smaller (<) than this value: they
+				can be removed in purge if not needed by other
+				views */
+	trx_id_t	low_limit_id;
+				/*!< The read should not see any transaction
+				with trx id >= this value. In other words,
+				this is the "high water mark". */
+	trx_id_t	up_limit_id;
+				/*!< The read should see all trx ids which
+				are strictly smaller (<) than this value.
+				In other words,
+				this is the "low water mark". */
+	ulint		n_trx_ids;
+				/*!< Number of cells in the trx_ids array */
+	trx_id_t*	trx_ids;/*!< Additional trx ids which the read should
+				not see: typically, these are the read-write
+				active transactions at the time when the read
+				is serialized, except the reading transaction
+				itself; the trx ids in this array are in a
+				descending order. These trx_ids should be
+				between the "low" and "high" water marks,
+				that is, up_limit_id and low_limit_id. */
+	trx_id_t	creator_trx_id;
+				/*!< trx id of creating transaction, or
+				0 used in purge */
+	UT_LIST_NODE_T(read_view_t) view_list;
+				/*!< List of read views in trx_sys */
+};
+
+/** Read view types @{ */
+#define VIEW_NORMAL		1	/*!< Normal consistent read view
+					where transaction does not see changes
+					made by active transactions except
+					creating transaction. */
+#define VIEW_HIGH_GRANULARITY	2	/*!< High-granularity read view where
+					transaction does not see changes
+					made by active transactions and own
+					changes after a point in time when this
+					read view was created. */
+/* @} */
+
+/** Implement InnoDB framework to support consistent read views in
+cursors. This struct holds both heap where consistent read view
+is allocated and pointer to a read view. */
+
+struct cursor_view_t{
+	mem_heap_t*	heap;
+				/*!< Memory heap for the cursor view */
+	read_view_t*	read_view;
+				/*!< Consistent read view of the cursor*/
+	ulint		n_mysql_tables_in_use;
+				/*!< number of Innobase tables used in the
+				processing of this cursor */
+};
+
+#ifndef UNIV_NONINL
+#include "read0read.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
new file mode 100644
index 00000000000..82c1028f12e
--- /dev/null
+++ b/storage/innobase/include/read0read.ic
@@ -0,0 +1,148 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0read.ic
+Cursor read
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#include "trx0sys.h"
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates a read view object. */
+static
+bool
+read_view_validate(
+/*===============*/
+	const read_view_t*	view)	/*!< in: view to validate */
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	/* Check that the view->trx_ids array is in descending order. */
+	for (ulint i = 1; i < view->n_trx_ids; ++i) {
+
+		ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
+	}
+
+	return(true);
+}
+
+/** Functor to validate the view list. */
+struct	ViewCheck {
+
+	ViewCheck() : m_prev_view(0) { }
+
+	void	operator()(const read_view_t* view)
+	{
+		ut_a(m_prev_view == NULL
+		     || m_prev_view->low_limit_no >= view->low_limit_no);
+
+		m_prev_view = view;
+	}
+
+	const read_view_t*	m_prev_view;
+};
+
+/*********************************************************************//**
+Validates a read view list. */
+static
+bool
+read_view_list_validate(void)
+/*=========================*/
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
+
+	return(true);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Checks if a read view sees the specified transaction.
+@return	true if sees */
+UNIV_INLINE
+bool
+read_view_sees_trx_id(
+/*==================*/
+	const read_view_t*	view,	/*!< in: read view */
+	trx_id_t		trx_id)	/*!< in: trx id */
+{
+	if (trx_id < view->up_limit_id) {
+
+		return(true);
+	} else if (trx_id >= view->low_limit_id) {
+
+		return(false);
+	} else {
+		ulint	lower = 0;
+		ulint	upper = view->n_trx_ids - 1;
+
+		ut_a(view->n_trx_ids > 0);
+
+		do {
+			ulint		mid	= (lower + upper) >> 1;
+			trx_id_t	mid_id	= view->trx_ids[mid];
+
+			if (mid_id == trx_id) {
+				return(FALSE);
+			} else if (mid_id < trx_id) {
+				if (mid > 0) {
+					upper = mid - 1;
+				} else {
+					break;
+				}
+			} else {
+				lower = mid + 1;
+			}
+		} while (lower <= upper);
+	}
+
+	return(true);
+}
+
+/*********************************************************************//**
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
+void
+read_view_remove(
+/*=============*/
+	read_view_t*	view,		/*!< in: read view, can be 0 */
+	bool		own_mutex)	/*!< in: true if caller owns the
+					trx_sys_t::mutex */
+{
+	if (view != 0) {
+		if (!own_mutex) {
+			mutex_enter(&trx_sys->mutex);
+		}
+
+		ut_ad(read_view_validate(view));
+
+		UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
+
+		ut_ad(read_view_list_validate());
+
+		if (!own_mutex) {
+			mutex_exit(&trx_sys->mutex);
+		}
+	}
+}
+
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
new file mode 100644
index 00000000000..969f4ebb637
--- /dev/null
+++ b/storage/innobase/include/read0types.h
@@ -0,0 +1,32 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/read0types.h
+Cursor read
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0types_h
+#define read0types_h
+
+struct read_view_t;
+struct cursor_view_t;
+
+#endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
new file mode 100644
index 00000000000..cb3c85ac2c8
--- /dev/null
+++ b/storage/innobase/include/rem0cmp.h
@@ -0,0 +1,301 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/rem0cmp.h
+Comparison services for records
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef rem0cmp_h
+#define rem0cmp_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+
+/*************************************************************//**
+Returns TRUE if two columns are equal for comparison purposes.
+@return	TRUE if the columns are considered equal in comparisons */
+UNIV_INTERN
+ibool
+cmp_cols_are_equal(
+/*===============*/
+	const dict_col_t*	col1,	/*!< in: column 1 */
+	const dict_col_t*	col2,	/*!< in: column 2 */
+	ibool			check_charsets);
+					/*!< in: whether to check charsets */
+/*************************************************************//**
+This function is used to compare two data fields for which we know the
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
+/*************************************************************//**
+This function is used to compare two data fields for which we know the
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow(
+/*===============*/
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/*!< in: data field length or UNIV_SQL_NULL */
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type to be VARCHAR.
+@return	1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_varchar(
+/*=======================*/
+	const byte*	lhs,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		lhs_len,/* in: data field length or UNIV_SQL_NULL */
+	const byte*	rhs,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		rhs_len);/* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
+This function is used to compare two varchar/char fields. The comparison
+is for the LIKE operator.
+@return	1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_like_prefix(
+/*===========================*/
+	const byte*	data1,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
+This function is used to compare two varchar/char fields. The comparison
+is for the LIKE operator.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_like_suffix(
+/*===========================*/
+	const byte*	data1,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
+/*****************************************************************
+This function is used to compare two varchar/char fields. The comparison
+is for the LIKE operator.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INTERN
+int
+cmp_data_data_slow_like_substr(
+/*===========================*/
+	const byte*	data1,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
+/*************************************************************//**
+This function is used to compare two dfields where at least the first
+has its data type field set.
+@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
+respectively */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/
+	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
+	const dfield_t*	dfield2);/*!< in: data field */
+/*************************************************************//**
+This function is used to compare a data tuple to a physical record.
+Only dtuple->n_fields_cmp first fields are taken into account for
+the data tuple! If we denote by n = n_fields_cmp, then rec must
+have either m >= n fields, or it must differ from dtuple in some of
+the m fields rec has. If rec has an externally stored field we do not
+compare it but return with value 0 if such a comparison should be
+made.
+@return 1, 0, -1, if dtuple is greater, equal, less than rec,
+respectively, when only the common first fields are compared, or until
+the first externally stored field in rec */
+UNIV_INTERN
+int
+cmp_dtuple_rec_with_match_low(
+/*==========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record which differs from
+				dtuple in some of the common fields, or which
+				has an equal number or more fields than
+				dtuple */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n_cmp,	/*!< in: number of fields to compare */
+	ulint*		matched_fields,
+				/*!< in/out: number of already completely
+				matched fields; when function returns,
+				contains the value for current comparison */
+	ulint*		matched_bytes)
+				/*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when function returns, contains the
+				value for current comparison */
+	__attribute__((nonnull));
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes)	\
+	cmp_dtuple_rec_with_match_low(					\
+		tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
+/**************************************************************//**
+Compares a data tuple to a physical record.
+@see cmp_dtuple_rec_with_match
+@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
+UNIV_INTERN
+int
+cmp_dtuple_rec(
+/*===========*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**************************************************************//**
+Checks if a dtuple is a prefix of a record. The last field in dtuple
+is allowed to be a prefix of the corresponding field in the record.
+@return	TRUE if prefix */
+UNIV_INTERN
+ibool
+cmp_dtuple_is_prefix_of_rec(
+/*========================*/
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@retval 1 if rec1 (including non-ordering columns) is greater than rec2
+@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval 0 if rec1 is a duplicate of rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+	const rec_t*		rec1,	/*!< in: physical record */
+	const rec_t*		rec2,	/*!< in: physical record */
+	const ulint*		offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+	const ulint*		offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+	const dict_index_t*	index,	/*!< in: data dictionary index */
+	struct TABLE*		table)	/*!< in: MySQL table, for reporting
+					duplicate key value if applicable,
+					or NULL */
+	__attribute__((nonnull(1,2,3,4), warn_unused_result));
+/*************************************************************//**
+This function is used to compare two physical records. Only the common
+first fields are compared, and if an externally stored field is
+encountered, then 0 is returned.
+@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
+UNIV_INTERN
+int
+cmp_rec_rec_with_match(
+/*===================*/
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index,	/*!< in: data dictionary index */
+	ibool		nulls_unequal,
+				/* in: TRUE if this is for index statistics
+				cardinality estimation, and innodb_stats_method
+				is "nulls_unequal" or "nulls_ignored" */
+	ulint*		matched_fields, /*!< in/out: number of already completely
+				matched fields; when the function returns,
+				contains the value the for current
+				comparison */
+	ulint*		matched_bytes);/*!< in/out: number of already matched
+				bytes within the first field not completely
+				matched; when the function returns, contains
+				the value for the current comparison */
+/*************************************************************//**
+This function is used to compare two physical records. Only the common
+first fields are compared.
+@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
+rec2; only the common first fields are compared */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index);	/*!< in: data dictionary index */
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INTERN
+int
+cmp_dfield_dfield_like_prefix(
+/*==========================*/
+				/* out: 1, 0, -1, if dfield1 is greater, equal,
+				less than dfield2, respectively */
+	dfield_t*	dfield1,/* in: data field; must have type field set */
+	dfield_t*	dfield2);/* in: data field */
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_substr(
+/*==========================*/
+				/* out: 1, 0, -1, if dfield1 is greater, equal,
+				less than dfield2, respectively */
+	dfield_t*	dfield1,/* in: data field; must have type field set */
+	dfield_t*	dfield2);/* in: data field */
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_suffix(
+/*==========================*/
+				/* out: 1, 0, -1, if dfield1 is greater, equal,
+				less than dfield2, respectively */
+	dfield_t*	dfield1,/* in: data field; must have type field set */
+	dfield_t*	dfield2);/* in: data field */
+
+#ifndef UNIV_NONINL
+#include "rem0cmp.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
new file mode 100644
index 00000000000..67a2dcacba1
--- /dev/null
+++ b/storage/innobase/include/rem0cmp.ic
@@ -0,0 +1,186 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/rem0cmp.ic
+Comparison services for records
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+/*************************************************************//**
+This function is used to compare two data fields for which we know the
+data type.
+@return	1, 0, -1, if data1 is greater, equal, less than data2, respectively */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/
+	ulint		mtype,	/*!< in: main type */
+	ulint		prtype,	/*!< in: precise type */
+	const byte*	data1,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/*!< in: data field length or UNIV_SQL_NULL */
+	const byte*	data2,	/*!< in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2)	/*!< in: data field length or UNIV_SQL_NULL */
+{
+	return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
+}
+
+/*****************************************************************
+This function is used to compare two (CHAR) data fields for the LIKE
+operator. */
+UNIV_INLINE
+int
+cmp_data_data_like_prefix(
+/*======================*/
+				/* out: 1, 0, -1, if data1 is greater, equal,
+				less than data2, respectively */
+	byte*           data1,  /* in: data field (== a pointer to a memory
+				buffer) */
+	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
+	byte*           data2,  /* in: data field (== a pointer to a memory
+				buffer) */
+	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
+{
+	return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2));
+}
+/*****************************************************************
+This function is used to compare two (CHAR) data fields for the LIKE
+operator. */
+UNIV_INLINE
+int
+cmp_data_data_like_suffix(
+/*======================*/
+				/* out: 1, 0, -1, if data1 is greater, equal,
+				less than data2, respectively */
+	byte*           data1,  /* in: data field (== a pointer to a memory
+				buffer) */
+	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
+	byte*           data2,  /* in: data field (== a pointer to a memory
+				buffer) */
+	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
+{
+	return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2));
+}
+/*****************************************************************
+This function is used to compare two (CHAR) data fields for the LIKE
+operator. */
+UNIV_INLINE
+int
+cmp_data_data_like_substr(
+/*======================*/
+				/* out: 1, 0, -1, if data1 is greater, equal,
+				less than data2, respectively */
+	byte*           data1,  /* in: data field (== a pointer to a memory
+				buffer) */
+	ulint           len1,   /* in: data field length or UNIV_SQL_NULL */
+	byte*           data2,  /* in: data field (== a pointer to a memory
+				buffer) */
+	ulint           len2)   /* in: data field length or UNIV_SQL_NULL */
+{
+	return(cmp_data_data_slow_like_substr(data1, len1, data2, len2));
+}
+/*************************************************************//**
+This function is used to compare two dfields where at least the first
+has its data type field set.
+@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
+respectively */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/
+	const dfield_t*	dfield1,/*!< in: data field; must have type field set */
+	const dfield_t*	dfield2)/*!< in: data field */
+{
+	const dtype_t*	type;
+
+	ut_ad(dfield_check_typed(dfield1));
+
+	type = dfield_get_type(dfield1);
+
+	return(cmp_data_data(type->mtype, type->prtype,
+			     (const byte*) dfield_get_data(dfield1),
+			     dfield_get_len(dfield1),
+			     (const byte*) dfield_get_data(dfield2),
+			     dfield_get_len(dfield2)));
+}
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_suffix(
+/*==========================*/
+				/* out: 1, 0, -1, if dfield1 is greater, equal,
+				less than dfield2, respectively */
+	dfield_t*       dfield1,/* in: data field; must have type field set */
+	dfield_t*       dfield2)/* in: data field */
+{
+	ut_ad(dfield_check_typed(dfield1));
+
+	return(cmp_data_data_like_suffix(
+		(byte*) dfield_get_data(dfield1),
+		dfield_get_len(dfield1),
+		(byte*) dfield_get_data(dfield2),
+		dfield_get_len(dfield2)));
+}
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield_like_substr(
+/*==========================*/
+				/* out: 1, 0, -1, if dfield1 is greater, equal,
+				less than dfield2, respectively */
+	dfield_t*       dfield1,/* in: data field; must have type field set */
+	dfield_t*       dfield2)/* in: data field */
+{
+	ut_ad(dfield_check_typed(dfield1));
+
+	return(cmp_data_data_like_substr(
+		(byte*) dfield_get_data(dfield1),
+		dfield_get_len(dfield1),
+		(byte*) dfield_get_data(dfield2),
+		dfield_get_len(dfield2)));
+}
+/*************************************************************//**
+This function is used to compare two physical records. Only the common
+first fields are compared.
+@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
+rec2; only the common first fields are compared */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/
+	const rec_t*	rec1,	/*!< in: physical record */
+	const rec_t*	rec2,	/*!< in: physical record */
+	const ulint*	offsets1,/*!< in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/*!< in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index)	/*!< in: data dictionary index */
+{
+	ulint	match_f		= 0;
+	ulint	match_b		= 0;
+
+	return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
+				      FALSE, &match_f, &match_b));
+}
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
new file mode 100644
index 00000000000..8e7d5ff2d48
--- /dev/null
+++ b/storage/innobase/include/rem0rec.h
@@ -0,0 +1,988 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0rec.h
+Record manager
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0rec_h
+#define rem0rec_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "rem0types.h"
+#include "mtr0types.h"
+#include "page0types.h"
+
+/* Info bit denoting the predefined minimum record: this bit is set
+if and only if the record is the first user record on a non-leaf
+B-tree page that is the leftmost page on its level
+(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
+#define REC_INFO_MIN_REC_FLAG	0x10UL
+/* The deleted flag in info bits */
+#define REC_INFO_DELETED_FLAG	0x20UL	/* when bit is set to 1, it means the
+					record has been delete marked */
+
+/* Number of extra bytes in an old-style record,
+in addition to the data and the offsets */
+#define REC_N_OLD_EXTRA_BYTES	6
+/* Number of extra bytes in a new-style record,
+in addition to the data and the offsets */
+#define REC_N_NEW_EXTRA_BYTES	5
+
+/* Record status values */
+#define REC_STATUS_ORDINARY	0
+#define REC_STATUS_NODE_PTR	1
+#define REC_STATUS_INFIMUM	2
+#define REC_STATUS_SUPREMUM	3
+
+/* The following four constants are needed in page0zip.cc in order to
+efficiently compress and decompress pages. */
+
+/* The offset of heap_no in a compact record */
+#define REC_NEW_HEAP_NO		4
+/* The shift of heap_no in a compact record.
+The status is stored in the low-order bits. */
+#define	REC_HEAP_NO_SHIFT	3
+
+/* Length of a B-tree node pointer, in bytes */
+#define REC_NODE_PTR_SIZE	4
+
+/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_1BYTE_SQL_NULL_MASK	0x80UL
+/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_2BYTE_SQL_NULL_MASK	0x8000UL
+
+/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most
+significant bit denotes that the tail of a field is stored off-page. */
+#define REC_2BYTE_EXTERN_MASK	0x4000UL
+
+#ifdef UNIV_DEBUG
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE	4
+#else /* UNIV_DEBUG */
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE	2
+#endif /* UNIV_DEBUG */
+
+/* Number of elements that should be initially allocated for the
+offsets[] array, first passed to rec_get_offsets() */
+#define REC_OFFS_NORMAL_SIZE	100
+#define REC_OFFS_SMALL_SIZE	10
+
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+const rec_t*
+rec_get_next_ptr_const(
+/*===================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+rec_t*
+rec_get_next_ptr(
+/*=============*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to get the offset of the
+next chained record on the same page.
+@return	the page offset of the next chained record, or 0 if none */
+UNIV_INLINE
+ulint
+rec_get_next_offs(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the next record offset field
+of an old-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	next)	/*!< in: offset of the next record */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to set the next record offset field
+of a new-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	next)	/*!< in: offset of the next record */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to get the number of fields
+in an old-style record.
+@return	number of data fields */
+UNIV_INLINE
+ulint
+rec_get_n_fields_old(
+/*=================*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to get the number of fields
+in a record.
+@return	number of data fields */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index)	/*!< in: record descriptor */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_old(
+/*================*/
+	const rec_t*	rec)	/*!< in: old-style physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_old(
+/*================*/
+	rec_t*	rec,		/*!< in: old-style physical record */
+	ulint	n_owned)	/*!< in: the number of owned */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_new(
+/*================*/
+	const rec_t*	rec)	/*!< in: new-style physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_new(
+/*================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n_owned)/*!< in: the number of owned */
+	__attribute__((nonnull(1)));
+/******************************************************//**
+The following function is used to retrieve the info bits of
+a record.
+@return	info bits */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function retrieves the status bits of a new-style record.
+@return	status bits */
+UNIV_INLINE
+ulint
+rec_get_status(
+/*===========*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+The following function is used to set the status bits of a new-style record. */
+UNIV_INLINE
+void
+rec_set_status(
+/*===========*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
+
+/******************************************************//**
+The following function is used to retrieve the info and status
+bits of a record.  (Only compact records have status bits.)
+@return	info bits */
+UNIV_INLINE
+ulint
+rec_get_info_and_status_bits(
+/*=========================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the info and status
+bits of a record.  (Only compact records have status bits.) */
+UNIV_INLINE
+void
+rec_set_info_and_status_bits(
+/*=========================*/
+	rec_t*	rec,	/*!< in/out: compact physical record */
+	ulint	bits)	/*!< in: info bits */
+	__attribute__((nonnull));
+
+/******************************************************//**
+The following function tells if record is delete marked.
+@return	nonzero if delete marked */
+UNIV_INLINE
+ulint
+rec_get_deleted_flag(
+/*=================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_old(
+/*=====================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	flag)	/*!< in: nonzero if delete marked */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_new(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+	__attribute__((nonnull(1)));
+/******************************************************//**
+The following function tells if a new-style record is a node pointer.
+@return	TRUE if node pointer */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*==================*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to get the order number
+of an old-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_old(
+/*================*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the heap number
+field in an old-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_old(
+/*================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	heap_no)/*!< in: the heap number */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to get the order number
+of a new-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_new(
+/*================*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+The following function is used to set the heap number
+field in a new-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_new(
+/*================*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	heap_no)/*!< in: the heap number */
+	__attribute__((nonnull));
+/******************************************************//**
+The following function is used to test whether the data offsets
+in the record are stored in one-byte or two-byte format.
+@return	TRUE if 1-byte form */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ibool	flag)	/*!< in: TRUE if 1byte form */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return	offset of the start of the field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Determine how many of the first n columns in a compact
+physical record are stored externally.
+@return	number of externally stored columns */
+UNIV_INTERN
+ulint
+rec_get_n_extern_new(
+/*=================*/
+	const rec_t*		rec,	/*!< in: compact physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			n)	/*!< in: number of columns to scan */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+The following function determines the offsets to each field
+in the record.	It can reuse a previously allocated array.
+@return	the new offsets */
+UNIV_INTERN
+ulint*
+rec_get_offsets_func(
+/*=================*/
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets,/*!< in/out: array consisting of
+					offsets[0] allocated elements,
+					or an array from rec_get_offsets(),
+					or NULL */
+	ulint			n_fields,/*!< in: maximum number of
+					initialized fields
+					 (ULINT_UNDEFINED if all fields) */
+#ifdef UNIV_DEBUG
+	const char*		file,	/*!< in: file name where called */
+	ulint			line,	/*!< in: line number where called */
+#endif /* UNIV_DEBUG */
+	mem_heap_t**		heap)	/*!< in/out: memory heap */
+#ifdef UNIV_DEBUG
+	__attribute__((nonnull(1,2,5,7),warn_unused_result));
+#else /* UNIV_DEBUG */
+	__attribute__((nonnull(1,2,5),warn_unused_result));
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
+# define rec_get_offsets(rec,index,offsets,n,heap)			\
+	rec_get_offsets_func(rec,index,offsets,n,__FILE__,__LINE__,heap)
+#else /* UNIV_DEBUG */
+# define rec_get_offsets(rec, index, offsets, n, heap)	\
+	rec_get_offsets_func(rec, index, offsets, n, heap)
+#endif /* UNIV_DEBUG */
+
+/******************************************************//**
+The following function determines the offsets to each field
+in the record.  It can reuse a previously allocated array. */
+UNIV_INTERN
+void
+rec_get_offsets_reverse(
+/*====================*/
+	const byte*		extra,	/*!< in: the extra bytes of a
+					compact record in reverse order,
+					excluding the fixed-size
+					REC_N_NEW_EXTRA_BYTES */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint			node_ptr,/*!< in: nonzero=node pointer,
+					0=leaf node */
+	ulint*			offsets)/*!< in/out: array consisting of
+					offsets[0] allocated elements */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return	TRUE if valid */
+UNIV_INLINE
+ibool
+rec_offs_validate(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record or NULL */
+	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
+	const ulint*		offsets)/*!< in: array returned by
+					rec_get_offsets() */
+	__attribute__((nonnull(3), warn_unused_result));
+/************************************************************//**
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in: array returned by
+					rec_get_offsets() */
+	__attribute__((nonnull));
+#else
+# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
+#endif /* UNIV_DEBUG */
+
+/************************************************************//**
+The following function is used to get the offset to the nth
+data field in an old-style record.
+@return	offset to the field */
+UNIV_INTERN
+ulint
+rec_get_nth_field_offs_old(
+/*=======================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
+				if SQL null */
+	__attribute__((nonnull));
+#define rec_get_nth_field_old(rec, n, len) \
+((rec) + rec_get_nth_field_offs_old(rec, n, len))
+/************************************************************//**
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size.
+@return	field size in bytes */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: index of the field */
+	__attribute__((nonnull, pure, warn_unused_result));
+/************************************************************//**
+The following function is used to get an offset to the nth
+data field in a record.
+@return	offset from the origin of rec */
+UNIV_INLINE
+ulint
+rec_get_nth_field_offs(
+/*===================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
+				if SQL null */
+	__attribute__((nonnull));
+#define rec_get_nth_field(rec, offsets, n, len) \
+((rec) + rec_get_nth_field_offs(offsets, n, len))
+/******************************************************//**
+Determine if the offsets are for a record in the new
+compact format.
+@return	nonzero if compact format */
+UNIV_INLINE
+ulint
+rec_offs_comp(
+/*==========*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+Determine if the offsets are for a record containing
+externally stored columns.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_any_extern(
+/*================*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return	first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+	const rec_t*	rec,		/*!< in: record */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+Returns nonzero if the extern bit is set in nth field of rec.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_nth_extern(
+/*================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+Returns nonzero if the SQL NULL bit is set in nth field of rec.
+@return	nonzero if SQL NULL */
+UNIV_INLINE
+ulint
+rec_offs_nth_sql_null(
+/*==================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
+	__attribute__((nonnull, pure, warn_unused_result));
+/******************************************************//**
+Gets the physical size of a field.
+@return	length of field */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
+	__attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns the number of extern bits set in a record.
+@return	number of externally stored fields */
+UNIV_INLINE
+ulint
+rec_offs_n_extern(
+/*==============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/***********************************************************//**
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null.
+For records in ROW_FORMAT=COMPACT (new-style records), len must not be
+UNIV_SQL_NULL unless the field already is SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index number of the field */
+	const void*	data,	/*!< in: pointer to the data if not SQL null */
+	ulint		len)	/*!< in: length of the data or UNIV_SQL_NULL.
+				If not SQL null, must have the same
+				length as the previous value.
+				If SQL null, previous value must be
+				SQL null. */
+	__attribute__((nonnull(1,2)));
+/**********************************************************//**
+The following function returns the data size of an old-style physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_get_data_size_old(
+/*==================*/
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull, pure, warn_unused_result));
+/**********************************************************//**
+The following function returns the number of allocated elements
+for an array of offsets.
+@return	number of elements */
+UNIV_INLINE
+ulint
+rec_offs_get_n_alloc(
+/*=================*/
+	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/**********************************************************//**
+The following function sets the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_alloc(
+/*=================*/
+	ulint*	offsets,	/*!< out: array for rec_get_offsets(),
+				must be allocated */
+	ulint	n_alloc)	/*!< in: number of elements */
+	__attribute__((nonnull));
+#define rec_offs_init(offsets) \
+	rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
+/**********************************************************//**
+The following function returns the number of fields in a record.
+@return	number of fields */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*==============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/**********************************************************//**
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_data_size(
+/*===============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/**********************************************************//**
+Returns the total size of record minus data size of record.
+The value returned by the function is the distance from record
+start to record origin in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_extra_size(
+/*================*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/**********************************************************//**
+Returns the total size of a physical record.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_size(
+/*==========*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+#ifdef UNIV_DEBUG
+/**********************************************************//**
+Returns a pointer to the start of the record.
+@return	pointer to start */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+/**********************************************************//**
+Returns a pointer to the end of the record.
+@return	pointer to end */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull, pure, warn_unused_result));
+#else /* UNIV_DEBUG */
+# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
+# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
+#endif /* UNIV_DEBUG */
+/***************************************************************//**
+Copies a physical record to a buffer.
+@return	pointer to the origin of the copy */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+	void*		buf,	/*!< in: buffer */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in a temporary file.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((warn_unused_result, nonnull));
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+	const rec_t*		rec,	/*!< in: temporary file record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in/out: array of offsets;
+					in: n=rec_offs_n_fields(offsets) */
+	__attribute__((nonnull));
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+	rec_t*			rec,		/*!< out: record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	const dfield_t*		fields,		/*!< in: array of data fields */
+	ulint			n_fields)	/*!< in: number of fields */
+	__attribute__((nonnull));
+
+/**************************************************************//**
+Copies the first n fields of a physical record to a new physical record in
+a buffer.
+@return	own: copied record */
+UNIV_INTERN
+rec_t*
+rec_copy_prefix_to_buf(
+/*===================*/
+	const rec_t*		rec,		/*!< in: physical record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint			n_fields,	/*!< in: number of fields
+						to copy */
+	byte**			buf,		/*!< in/out: memory buffer
+						for the copied prefix,
+						or NULL */
+	ulint*			buf_size)	/*!< in/out: buffer size */
+	__attribute__((nonnull));
+/************************************************************//**
+Folds a prefix of a physical record to a ulint.
+@return	the folded value */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+	const rec_t*	rec,		/*!< in: the physical record */
+	const ulint*	offsets,	/*!< in: array returned by
+					rec_get_offsets() */
+	ulint		n_fields,	/*!< in: number of complete
+					fields to fold */
+	ulint		n_bytes,	/*!< in: number of bytes to fold
+					in an incomplete last field */
+	index_id_t	tree_id)	/*!< in: index tree id */
+	__attribute__((nonnull, pure, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************//**
+Builds a physical record out of a data tuple and
+stores it into the given buffer.
+@return	pointer to the origin of physical record */
+UNIV_INTERN
+rec_t*
+rec_convert_dtuple_to_rec(
+/*======================*/
+	byte*			buf,	/*!< in: start address of the
+					physical record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*		dtuple,	/*!< in: data tuple */
+	ulint			n_ext)	/*!< in: number of
+					externally stored columns */
+	__attribute__((nonnull, warn_unused_result));
+/**********************************************************//**
+Returns the extra size of an old-style physical record if we know its
+data size and number of fields.
+@return	extra size */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+	ulint	data_size,	/*!< in: data size */
+	ulint	n_fields,	/*!< in: number of fields */
+	ulint	n_ext)		/*!< in: number of externally stored columns */
+	__attribute__((const));
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((warn_unused_result, nonnull(1,2)));
+/**********************************************************//**
+Determines the size of a data tuple in ROW_FORMAT=COMPACT.
+@return	total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp(
+/*========================*/
+	const dict_index_t*	index,	/*!< in: record descriptor;
+					dict_table_is_comp() is
+					assumed to hold, even if
+					it does not */
+	ulint			status,	/*!< in: status bits of the record */
+	const dfield_t*		fields,	/*!< in: array of data fields */
+	ulint			n_fields,/*!< in: number of data fields */
+	ulint*			extra)	/*!< out: extra size */
+	__attribute__((nonnull(1,3)));
+/**********************************************************//**
+The following function returns the size of a data tuple when converted to
+a physical record.
+@return	size */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+	__attribute__((warn_unused_result, nonnull));
+#ifndef UNIV_HOTBACKUP
+/**************************************************************//**
+Copies the first n fields of a physical record to a data tuple.
+The fields are copied to the memory heap. */
+UNIV_INTERN
+void
+rec_copy_prefix_to_dtuple(
+/*======================*/
+	dtuple_t*		tuple,		/*!< out: data tuple */
+	const rec_t*		rec,		/*!< in: physical record */
+	const dict_index_t*	index,		/*!< in: record descriptor */
+	ulint			n_fields,	/*!< in: number of fields
+						to copy */
+	mem_heap_t*		heap)		/*!< in: memory heap */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/***************************************************************//**
+Validates the consistency of a physical record.
+@return	TRUE if ok */
+UNIV_INTERN
+ibool
+rec_validate(
+/*=========*/
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
+/***************************************************************//**
+Prints an old-style physical record. */
+UNIV_INTERN
+void
+rec_print_old(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec)	/*!< in: physical record */
+	__attribute__((nonnull));
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Prints a physical record in ROW_FORMAT=COMPACT.  Ignores the
+record header. */
+UNIV_INTERN
+void
+rec_print_comp(
+/*===========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
+/***************************************************************//**
+Prints a physical record. */
+UNIV_INTERN
+void
+rec_print_new(
+/*==========*/
+	FILE*		file,	/*!< in: file where to print */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	__attribute__((nonnull));
+/***************************************************************//**
+Prints a physical record. */
+UNIV_INTERN
+void
+rec_print(
+/*======*/
+	FILE*			file,	/*!< in: file where to print */
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index)	/*!< in: record descriptor */
+	__attribute__((nonnull));
+
+# ifdef UNIV_DEBUG
+/************************************************************//**
+Reads the DB_TRX_ID of a clustered index record.
+@return	the value of DB_TRX_ID */
+UNIV_INTERN
+trx_id_t
+rec_get_trx_id(
+/*===========*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index)	/*!< in: clustered index */
+	__attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
+#endif /* UNIV_HOTBACKUP */
+
+/* Maximum lengths for the data in a physical record if the offsets
+are given in one byte (resp. two byte) format. */
+#define REC_1BYTE_OFFS_LIMIT	0x7FUL
+#define REC_2BYTE_OFFS_LIMIT	0x7FFFUL
+
+/* The data size of record must be smaller than this because we reserve
+two upmost bits in a two byte offset for special purposes */
+#define REC_MAX_DATA_SIZE	(16 * 1024)
+
+#ifndef UNIV_NONINL
+#include "rem0rec.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
new file mode 100644
index 00000000000..a539320dd2a
--- /dev/null
+++ b/storage/innobase/include/rem0rec.ic
@@ -0,0 +1,1718 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0rec.ic
+Record manager
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mach0data.h"
+#include "ut0byte.h"
+#include "dict0dict.h"
+#include "btr0types.h"
+
+/* Compact flag ORed to the extra size returned by rec_get_offsets() */
+#define REC_OFFS_COMPACT	((ulint) 1 << 31)
+/* SQL NULL flag in offsets returned by rec_get_offsets() */
+#define REC_OFFS_SQL_NULL	((ulint) 1 << 31)
+/* External flag in offsets returned by rec_get_offsets() */
+#define REC_OFFS_EXTERNAL	((ulint) 1 << 30)
+/* Mask for offsets returned by rec_get_offsets() */
+#define REC_OFFS_MASK		(REC_OFFS_EXTERNAL - 1)
+
+/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
+most significant bytes and bits are written below less significant.
+
+	(1) byte offset		(2) bit usage within byte
+	downward from
+	origin ->	1	8 bits pointer to next record
+			2	8 bits pointer to next record
+			3	1 bit short flag
+				7 bits number of fields
+			4	3 bits number of fields
+				5 bits heap number
+			5	8 bits heap number
+			6	4 bits n_owned
+				4 bits info bits
+*/
+
+/* Offsets of the bit-fields in a new-style record. NOTE! In the table the
+most significant bytes and bits are written below less significant.
+
+	(1) byte offset		(2) bit usage within byte
+	downward from
+	origin ->	1	8 bits relative offset of next record
+			2	8 bits relative offset of next record
+				  the relative offset is an unsigned 16-bit
+				  integer:
+				  (offset_of_next_record
+				   - offset_of_this_record) mod 64Ki,
+				  where mod is the modulo as a non-negative
+				  number;
+				  we can calculate the offset of the next
+				  record with the formula:
+				  relative_offset + offset_of_this_record
+				  mod UNIV_PAGE_SIZE
+			3	3 bits status:
+					000=conventional record
+					001=node pointer record (inside B-tree)
+					010=infimum record
+					011=supremum record
+					1xx=reserved
+				5 bits heap number
+			4	8 bits heap number
+			5	4 bits n_owned
+				4 bits info bits
+*/
+
+/* We list the byte offsets from the origin of the record, the mask,
+and the shift needed to obtain each bit-field of the record. */
+
+#define REC_NEXT		2
+#define REC_NEXT_MASK		0xFFFFUL
+#define REC_NEXT_SHIFT		0
+
+#define REC_OLD_SHORT		3	/* This is single byte bit-field */
+#define REC_OLD_SHORT_MASK	0x1UL
+#define REC_OLD_SHORT_SHIFT	0
+
+#define REC_OLD_N_FIELDS	4
+#define REC_OLD_N_FIELDS_MASK	0x7FEUL
+#define REC_OLD_N_FIELDS_SHIFT	1
+
+#define REC_NEW_STATUS		3	/* This is single byte bit-field */
+#define REC_NEW_STATUS_MASK	0x7UL
+#define REC_NEW_STATUS_SHIFT	0
+
+#define REC_OLD_HEAP_NO		5
+#define REC_HEAP_NO_MASK	0xFFF8UL
+#if 0 /* defined in rem0rec.h for use of page0zip.cc */
+#define REC_NEW_HEAP_NO		4
+#define	REC_HEAP_NO_SHIFT	3
+#endif
+
+#define REC_OLD_N_OWNED		6	/* This is single byte bit-field */
+#define REC_NEW_N_OWNED		5	/* This is single byte bit-field */
+#define	REC_N_OWNED_MASK	0xFUL
+#define REC_N_OWNED_SHIFT	0
+
+#define REC_OLD_INFO_BITS	6	/* This is single byte bit-field */
+#define REC_NEW_INFO_BITS	5	/* This is single byte bit-field */
+#define	REC_INFO_BITS_MASK	0xF0UL
+#define REC_INFO_BITS_SHIFT	0
+
+#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
+		^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
+		^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
+		^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \
+		^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \
+		^ 0xFFFFFFFFUL
+# error "sum of old-style masks != 0xFFFFFFFFUL"
+#endif
+#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \
+		^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \
+		^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \
+		^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \
+		^ 0xFFFFFFUL
+# error "sum of new-style masks != 0xFFFFFFUL"
+#endif
+
+/***********************************************************//**
+Sets the value of the ith field SQL null bit of an old-style record. */
+UNIV_INTERN
+void
+rec_set_nth_field_null_bit(
+/*=======================*/
+	rec_t*	rec,	/*!< in: record */
+	ulint	i,	/*!< in: ith field */
+	ibool	val);	/*!< in: value to set */
+/***********************************************************//**
+Sets an old-style record field to SQL null.
+The physical size of the field is not changed. */
+UNIV_INTERN
+void
+rec_set_nth_field_sql_null(
+/*=======================*/
+	rec_t*	rec,	/*!< in: record */
+	ulint	n);	/*!< in: index of the field */
+
+/******************************************************//**
+Gets a bit field from within 1 byte. */
+UNIV_INLINE
+ulint
+rec_get_bit_field_1(
+/*================*/
+	const rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint		offs,	/*!< in: offset from the origin down */
+	ulint		mask,	/*!< in: mask used to filter bits */
+	ulint		shift)	/*!< in: shift right applied after masking */
+{
+	ut_ad(rec);
+
+	return((mach_read_from_1(rec - offs) & mask) >> shift);
+}
+
+/******************************************************//**
+Sets a bit field within 1 byte. */
+UNIV_INLINE
+void
+rec_set_bit_field_1(
+/*================*/
+	rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint	val,	/*!< in: value to set */
+	ulint	offs,	/*!< in: offset from the origin down */
+	ulint	mask,	/*!< in: mask used to filter bits */
+	ulint	shift)	/*!< in: shift right applied after masking */
+{
+	ut_ad(rec);
+	ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
+	ut_ad(mask);
+	ut_ad(mask <= 0xFFUL);
+	ut_ad(((mask >> shift) << shift) == mask);
+	ut_ad(((val << shift) & mask) == (val << shift));
+
+	mach_write_to_1(rec - offs,
+			(mach_read_from_1(rec - offs) & ~mask)
+			| (val << shift));
+}
+
+/******************************************************//**
+Gets a bit field from within 2 bytes. */
+UNIV_INLINE
+ulint
+rec_get_bit_field_2(
+/*================*/
+	const rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint		offs,	/*!< in: offset from the origin down */
+	ulint		mask,	/*!< in: mask used to filter bits */
+	ulint		shift)	/*!< in: shift right applied after masking */
+{
+	ut_ad(rec);
+
+	return((mach_read_from_2(rec - offs) & mask) >> shift);
+}
+
+/******************************************************//**
+Sets a bit field within 2 bytes. */
+UNIV_INLINE
+void
+rec_set_bit_field_2(
+/*================*/
+	rec_t*	rec,	/*!< in: pointer to record origin */
+	ulint	val,	/*!< in: value to set */
+	ulint	offs,	/*!< in: offset from the origin down */
+	ulint	mask,	/*!< in: mask used to filter bits */
+	ulint	shift)	/*!< in: shift right applied after masking */
+{
+	ut_ad(rec);
+	ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
+	ut_ad(mask > 0xFFUL);
+	ut_ad(mask <= 0xFFFFUL);
+	ut_ad((mask >> shift) & 1);
+	ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
+	ut_ad(((mask >> shift) << shift) == mask);
+	ut_ad(((val << shift) & mask) == (val << shift));
+
+	mach_write_to_2(rec - offs,
+			(mach_read_from_2(rec - offs) & ~mask)
+			| (val << shift));
+}
+
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+const rec_t*
+rec_get_next_ptr_const(
+/*===================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+{
+	ulint	field_value;
+
+	ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+	ut_ad(REC_NEXT_SHIFT == 0);
+
+	field_value = mach_read_from_2(rec - REC_NEXT);
+
+	if (field_value == 0) {
+
+		return(NULL);
+	}
+
+	if (comp) {
+#if UNIV_PAGE_SIZE_MAX <= 32768
+		/* Note that for 64 KiB pages, field_value can 'wrap around'
+		and the debug assertion is not valid */
+
+		/* In the following assertion, field_value is interpreted
+		as signed 16-bit integer in 2's complement arithmetics.
+		If all platforms defined int16_t in the standard headers,
+		the expression could be written simpler as
+		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		*/
+		ut_ad((field_value >= 32768
+		       ? field_value - 65536
+		       : field_value)
+		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
+		      < UNIV_PAGE_SIZE);
+#endif
+		/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
+		between each record. */
+		ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
+		       && field_value < 32768)
+		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
+
+		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+		       + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+	} else {
+		ut_ad(field_value < UNIV_PAGE_SIZE);
+
+		return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+		       + field_value);
+	}
+}
+
+/******************************************************//**
+The following function is used to get the pointer of the next chained record
+on the same page.
+@return	pointer to the next chained record, or NULL if none */
+UNIV_INLINE
+rec_t*
+rec_get_next_ptr(
+/*=============*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	comp)	/*!< in: nonzero=compact page format */
+{
+	return(const_cast<rec_t*>(rec_get_next_ptr_const(rec, comp)));
+}
+
+/******************************************************//**
+The following function is used to get the offset of the next chained record
+on the same page.
+@return	the page offset of the next chained record, or 0 if none */
+UNIV_INLINE
+ulint
+rec_get_next_offs(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+{
+	ulint	field_value;
+#if REC_NEXT_MASK != 0xFFFFUL
+# error "REC_NEXT_MASK != 0xFFFFUL"
+#endif
+#if REC_NEXT_SHIFT
+# error "REC_NEXT_SHIFT != 0"
+#endif
+
+	field_value = mach_read_from_2(rec - REC_NEXT);
+
+	if (comp) {
+#if UNIV_PAGE_SIZE_MAX <= 32768
+		/* Note that for 64 KiB pages, field_value can 'wrap around'
+		and the debug assertion is not valid */
+
+		/* In the following assertion, field_value is interpreted
+		as signed 16-bit integer in 2's complement arithmetics.
+		If all platforms defined int16_t in the standard headers,
+		the expression could be written simpler as
+		(int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+		*/
+		ut_ad((field_value >= 32768
+		       ? field_value - 65536
+		       : field_value)
+		      + ut_align_offset(rec, UNIV_PAGE_SIZE)
+		      < UNIV_PAGE_SIZE);
+#endif
+		if (field_value == 0) {
+
+			return(0);
+		}
+
+		/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
+		between each record. */
+		ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
+		       && field_value < 32768)
+		      || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
+
+		return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+	} else {
+		ut_ad(field_value < UNIV_PAGE_SIZE);
+
+		return(field_value);
+	}
+}
+
+/******************************************************//**
+The following function is used to set the next record offset field
+of an old-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	next)	/*!< in: offset of the next record */
+{
+	ut_ad(rec);
+	ut_ad(UNIV_PAGE_SIZE > next);
+#if REC_NEXT_MASK != 0xFFFFUL
+# error "REC_NEXT_MASK != 0xFFFFUL"
+#endif
+#if REC_NEXT_SHIFT
+# error "REC_NEXT_SHIFT != 0"
+#endif
+
+	mach_write_to_2(rec - REC_NEXT, next);
+}
+
+/******************************************************//**
+The following function is used to set the next record offset field
+of a new-style record. */
+UNIV_INLINE
+void
+rec_set_next_offs_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	next)	/*!< in: offset of the next record */
+{
+	ulint	field_value;
+
+	ut_ad(rec);
+	ut_ad(UNIV_PAGE_SIZE > next);
+
+	if (!next) {
+		field_value = 0;
+	} else {
+		/* The following two statements calculate
+		next - offset_of_rec mod 64Ki, where mod is the modulo
+		as a non-negative number */
+
+		field_value = (ulint)
+			((lint) next
+			 - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
+		field_value &= REC_NEXT_MASK;
+	}
+
+	mach_write_to_2(rec - REC_NEXT, field_value);
+}
+
+/******************************************************//**
+The following function is used to get the number of fields
+in an old-style record.
+@return	number of data fields */
+UNIV_INLINE
+ulint
+rec_get_n_fields_old(
+/*=================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS,
+				  REC_OLD_N_FIELDS_MASK,
+				  REC_OLD_N_FIELDS_SHIFT);
+	ut_ad(ret <= REC_MAX_N_FIELDS);
+	ut_ad(ret > 0);
+
+	return(ret);
+}
+
+/******************************************************//**
+The following function is used to set the number of fields
+in an old-style record. */
+UNIV_INLINE
+void
+rec_set_n_fields_old(
+/*=================*/
+	rec_t*	rec,		/*!< in: physical record */
+	ulint	n_fields)	/*!< in: the number of fields */
+{
+	ut_ad(rec);
+	ut_ad(n_fields <= REC_MAX_N_FIELDS);
+	ut_ad(n_fields > 0);
+
+	rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS,
+			    REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
+}
+
+/******************************************************//**
+The following function retrieves the status bits of a new-style record.
+@return	status bits */
+UNIV_INLINE
+ulint
+rec_get_status(
+/*===========*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
+				  REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
+	ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
+
+	return(ret);
+}
+
+/******************************************************//**
+The following function is used to get the number of fields
+in a record.
+@return	number of data fields */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+	const rec_t*		rec,	/*!< in: physical record */
+	const dict_index_t*	index)	/*!< in: record descriptor */
+{
+	ut_ad(rec);
+	ut_ad(index);
+
+	if (!dict_table_is_comp(index->table)) {
+		return(rec_get_n_fields_old(rec));
+	}
+
+	switch (rec_get_status(rec)) {
+	case REC_STATUS_ORDINARY:
+		return(dict_index_get_n_fields(index));
+	case REC_STATUS_NODE_PTR:
+		return(dict_index_get_n_unique_in_tree(index) + 1);
+	case REC_STATUS_INFIMUM:
+	case REC_STATUS_SUPREMUM:
+		return(1);
+	default:
+		ut_error;
+		return(ULINT_UNDEFINED);
+	}
+}
+
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_old(
+/*================*/
+	const rec_t*	rec)	/*!< in: old-style physical record */
+{
+	return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
+				   REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_old(
+/*================*/
+	rec_t*	rec,		/*!< in: old-style physical record */
+	ulint	n_owned)	/*!< in: the number of owned */
+{
+	rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
+			    REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
+}
+
+/******************************************************//**
+The following function is used to get the number of records owned by the
+previous directory record.
+@return	number of owned records */
+UNIV_INLINE
+ulint
+rec_get_n_owned_new(
+/*================*/
+	const rec_t*	rec)	/*!< in: new-style physical record */
+{
+	return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
+				   REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned_new(
+/*================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		n_owned)/*!< in: the number of owned */
+{
+	rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
+			    REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
+	if (page_zip && rec_get_status(rec) != REC_STATUS_SUPREMUM) {
+		page_zip_rec_set_owned(page_zip, rec, n_owned);
+	}
+}
+
+/******************************************************//**
+The following function is used to retrieve the info bits of a record.
+@return	info bits */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+{
+	return(rec_get_bit_field_1(
+		       rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+		       REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_old(
+/*==================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	bits)	/*!< in: info bits */
+{
+	rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
+			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+}
+/******************************************************//**
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits_new(
+/*==================*/
+	rec_t*	rec,	/*!< in/out: new-style physical record */
+	ulint	bits)	/*!< in: info bits */
+{
+	rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
+			    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
+}
+
+/******************************************************//**
+The following function is used to set the status bits of a new-style record. */
+UNIV_INLINE
+void
+rec_set_status(
+/*===========*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	bits)	/*!< in: info bits */
+{
+	rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
+			    REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
+}
+
+/******************************************************//**
+The following function is used to retrieve the info and status
+bits of a record.  (Only compact records have status bits.)
+@return	info bits */
+UNIV_INLINE
+ulint
+rec_get_info_and_status_bits(
+/*=========================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+{
+	ulint	bits;
+#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
+& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
+# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
+#endif
+	if (comp) {
+		bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
+	} else {
+		bits = rec_get_info_bits(rec, FALSE);
+		ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
+	}
+	return(bits);
+}
+/******************************************************//**
+The following function is used to set the info and status
+bits of a record.  (Only compact records have status bits.) */
+UNIV_INLINE
+void
+rec_set_info_and_status_bits(
+/*=========================*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	bits)	/*!< in: info bits */
+{
+#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
+& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
+# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
+#endif
+	rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
+	rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
+}
+
+/******************************************************//**
+The following function tells if record is delete marked.
+@return	nonzero if delete marked */
+UNIV_INLINE
+ulint
+rec_get_deleted_flag(
+/*=================*/
+	const rec_t*	rec,	/*!< in: physical record */
+	ulint		comp)	/*!< in: nonzero=compact page format */
+{
+	if (comp) {
+		return(rec_get_bit_field_1(rec, REC_NEW_INFO_BITS,
+					   REC_INFO_DELETED_FLAG,
+					   REC_INFO_BITS_SHIFT));
+	} else {
+		return(rec_get_bit_field_1(rec, REC_OLD_INFO_BITS,
+					   REC_INFO_DELETED_FLAG,
+					   REC_INFO_BITS_SHIFT));
+	}
+}
+
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_old(
+/*=====================*/
+	rec_t*	rec,	/*!< in: old-style physical record */
+	ulint	flag)	/*!< in: nonzero if delete marked */
+{
+	ulint	val;
+
+	val = rec_get_info_bits(rec, FALSE);
+
+	if (flag) {
+		val |= REC_INFO_DELETED_FLAG;
+	} else {
+		val &= ~REC_INFO_DELETED_FLAG;
+	}
+
+	rec_set_info_bits_old(rec, val);
+}
+
+/******************************************************//**
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag_new(
+/*=====================*/
+	rec_t*		rec,	/*!< in/out: new-style physical record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	ulint		flag)	/*!< in: nonzero if delete marked */
+{
+	ulint	val;
+
+	val = rec_get_info_bits(rec, TRUE);
+
+	if (flag) {
+		val |= REC_INFO_DELETED_FLAG;
+	} else {
+		val &= ~REC_INFO_DELETED_FLAG;
+	}
+
+	rec_set_info_bits_new(rec, val);
+
+	if (page_zip) {
+		page_zip_rec_set_deleted(page_zip, rec, flag);
+	}
+}
+
+/******************************************************//**
+The following function tells if a new-style record is a node pointer.
+@return	TRUE if node pointer */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*==================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	return(REC_STATUS_NODE_PTR == rec_get_status(rec));
+}
+
+/******************************************************//**
+The following function is used to get the order number
+of an old-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_old(
+/*================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
+				   REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the heap number
+field in an old-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_old(
+/*================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ulint	heap_no)/*!< in: the heap number */
+{
+	rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
+			    REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
+}
+
+/******************************************************//**
+The following function is used to get the order number
+of a new-style record in the heap of the index page.
+@return	heap order number */
+UNIV_INLINE
+ulint
+rec_get_heap_no_new(
+/*================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
+				   REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the heap number
+field in a new-style record. */
+UNIV_INLINE
+void
+rec_set_heap_no_new(
+/*================*/
+	rec_t*	rec,	/*!< in/out: physical record */
+	ulint	heap_no)/*!< in: the heap number */
+{
+	rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
+			    REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
+}
+
+/******************************************************//**
+The following function is used to test whether the data offsets in the record
+are stored in one-byte or two-byte format.
+@return	TRUE if 1-byte form */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+#if TRUE != 1
+#error "TRUE != 1"
+#endif
+
+	return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
+				   REC_OLD_SHORT_SHIFT));
+}
+
+/******************************************************//**
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+	rec_t*	rec,	/*!< in: physical record */
+	ibool	flag)	/*!< in: TRUE if 1byte form */
+{
+#if TRUE != 1
+#error "TRUE != 1"
+#endif
+	ut_ad(flag <= TRUE);
+
+	rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
+			    REC_OLD_SHORT_SHIFT);
+}
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return	offset of the start of the field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields_old(rec));
+
+	return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
+}
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields_old(rec));
+
+	return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
+}
+
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
+}
+
+/* Get the base address of offsets.  The extra_size is stored at
+this position, and following positions hold the end offsets of
+the fields. */
+#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
+
+/**********************************************************//**
+The following function returns the number of allocated elements
+for an array of offsets.
+@return	number of elements */
+UNIV_INLINE
+ulint
+rec_offs_get_n_alloc(
+/*=================*/
+	const ulint*	offsets)/*!< in: array for rec_get_offsets() */
+{
+	ulint	n_alloc;
+	ut_ad(offsets);
+	n_alloc = offsets[0];
+	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+	UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
+	return(n_alloc);
+}
+
+/**********************************************************//**
+The following function sets the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_alloc(
+/*=================*/
+	ulint*	offsets,	/*!< out: array for rec_get_offsets(),
+				must be allocated */
+	ulint	n_alloc)	/*!< in: number of elements */
+{
+	ut_ad(offsets);
+	ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+	UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets);
+	offsets[0] = n_alloc;
+}
+
+/**********************************************************//**
+The following function returns the number of fields in a record.
+@return	number of fields */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*==============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	n_fields;
+	ut_ad(offsets);
+	n_fields = offsets[1];
+	ut_ad(n_fields > 0);
+	ut_ad(n_fields <= REC_MAX_N_FIELDS);
+	ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+	      <= rec_offs_get_n_alloc(offsets));
+	return(n_fields);
+}
+
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return	TRUE if valid */
+UNIV_INLINE
+ibool
+rec_offs_validate(
+/*==============*/
+	const rec_t*		rec,	/*!< in: record or NULL */
+	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
+	const ulint*		offsets)/*!< in: array returned by
+					rec_get_offsets() */
+{
+	ulint	i	= rec_offs_n_fields(offsets);
+	ulint	last	= ULINT_MAX;
+	ulint	comp	= *rec_offs_base(offsets) & REC_OFFS_COMPACT;
+
+	if (rec) {
+		ut_ad((ulint) rec == offsets[2]);
+		if (!comp) {
+			ut_a(rec_get_n_fields_old(rec) >= i);
+		}
+	}
+	if (index) {
+		ulint max_n_fields;
+		ut_ad((ulint) index == offsets[3]);
+		max_n_fields = ut_max(
+			dict_index_get_n_fields(index),
+			dict_index_get_n_unique_in_tree(index) + 1);
+		if (comp && rec) {
+			switch (rec_get_status(rec)) {
+			case REC_STATUS_ORDINARY:
+				break;
+			case REC_STATUS_NODE_PTR:
+				max_n_fields = dict_index_get_n_unique_in_tree(
+					index) + 1;
+				break;
+			case REC_STATUS_INFIMUM:
+			case REC_STATUS_SUPREMUM:
+				max_n_fields = 1;
+				break;
+			default:
+				ut_error;
+			}
+		}
+		/* index->n_def == 0 for dummy indexes if !comp */
+		ut_a(!comp || index->n_def);
+		ut_a(!index->n_def || i <= max_n_fields);
+	}
+	while (i--) {
+		ulint	curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
+		ut_a(curr <= last);
+		last = curr;
+	}
+	return(TRUE);
+}
+#ifdef UNIV_DEBUG
+/************************************************************//**
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: record descriptor */
+	ulint*			offsets)/*!< in: array returned by
+					rec_get_offsets() */
+{
+	ut_ad(rec);
+	ut_ad(index);
+	ut_ad(offsets);
+	ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
+	offsets[2] = (ulint) rec;
+	offsets[3] = (ulint) index;
+}
+#endif /* UNIV_DEBUG */
+
+/************************************************************//**
+The following function is used to get an offset to the nth
+data field in a record.
+@return	offset from the origin of rec */
+UNIV_INLINE
+ulint
+rec_get_nth_field_offs(
+/*===================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index of the field */
+	ulint*		len)	/*!< out: length of the field; UNIV_SQL_NULL
+				if SQL null */
+{
+	ulint	offs;
+	ulint	length;
+	ut_ad(n < rec_offs_n_fields(offsets));
+	ut_ad(len);
+
+	if (n == 0) {
+		offs = 0;
+	} else {
+		offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
+	}
+
+	length = rec_offs_base(offsets)[1 + n];
+
+	if (length & REC_OFFS_SQL_NULL) {
+		length = UNIV_SQL_NULL;
+	} else {
+		length &= REC_OFFS_MASK;
+		length -= offs;
+	}
+
+	*len = length;
+	return(offs);
+}
+
+/******************************************************//**
+Determine if the offsets are for a record in the new
+compact format.
+@return	nonzero if compact format */
+UNIV_INLINE
+ulint
+rec_offs_comp(
+/*==========*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
+}
+
+/******************************************************//**
+Determine if the offsets are for a record containing
+externally stored columns.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_any_extern(
+/*================*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
+}
+
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return	first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+	const rec_t*	rec,		/*!< in: record */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
+{
+	ulint	i;
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	if (!rec_offs_any_extern(offsets)) {
+		return(NULL);
+	}
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint		len;
+			const byte*	field
+				= rec_get_nth_field(rec, offsets, i, &len);
+
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			if (!memcmp(field + len
+				    - BTR_EXTERN_FIELD_REF_SIZE,
+				    field_ref_zero,
+				    BTR_EXTERN_FIELD_REF_SIZE)) {
+				return(field);
+			}
+		}
+	}
+
+	return(NULL);
+}
+
+/******************************************************//**
+Returns nonzero if the extern bit is set in nth field of rec.
+@return	nonzero if externally stored */
+UNIV_INLINE
+ulint
+rec_offs_nth_extern(
+/*================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	ut_ad(n < rec_offs_n_fields(offsets));
+	return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
+}
+
+/******************************************************//**
+Returns nonzero if the SQL NULL bit is set in nth field of rec.
+@return	nonzero if SQL NULL */
+UNIV_INLINE
+ulint
+rec_offs_nth_sql_null(
+/*==================*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	ut_ad(n < rec_offs_n_fields(offsets));
+	return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
+}
+
+/******************************************************//**
+Gets the physical size of a field.
+@return	length of field */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n)	/*!< in: nth field */
+{
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	ut_ad(n < rec_offs_n_fields(offsets));
+	if (!n) {
+		return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK);
+	}
+	return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
+	       & REC_OFFS_MASK);
+}
+
+/******************************************************//**
+Returns the number of extern bits set in a record.
+@return	number of externally stored fields */
+UNIV_INLINE
+ulint
+rec_offs_n_extern(
+/*==============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	n = 0;
+
+	if (rec_offs_any_extern(offsets)) {
+		ulint	i;
+
+		for (i = rec_offs_n_fields(offsets); i--; ) {
+			if (rec_offs_nth_extern(offsets, i)) {
+				n++;
+			}
+		}
+	}
+
+	return(n);
+}
+
+/******************************************************//**
+Returns the offset of n - 1th field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. This function and the 2-byte counterpart are defined here because the
+C-compiler was not able to sum negative and positive constant offsets, and
+warned of constant arithmetic overflow within the compiler.
+@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_1_get_prev_field_end_info(
+/*==========================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields_old(rec));
+
+	return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
+}
+
+/******************************************************//**
+Returns the offset of n - 1th field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return	offset of the start of the PREVIOUS field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_2_get_prev_field_end_info(
+/*==========================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields_old(rec));
+
+	return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
+}
+
+/******************************************************//**
+Sets the field end info for the nth field if the record is stored in the
+1-byte format. */
+UNIV_INLINE
+void
+rec_1_set_field_end_info(
+/*=====================*/
+	rec_t*	rec,	/*!< in: record */
+	ulint	n,	/*!< in: field index */
+	ulint	info)	/*!< in: value to set */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields_old(rec));
+
+	mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
+}
+
+/******************************************************//**
+Sets the field end info for the nth field if the record is stored in the
+2-byte format. */
+UNIV_INLINE
+void
+rec_2_set_field_end_info(
+/*=====================*/
+	rec_t*	rec,	/*!< in: record */
+	ulint	n,	/*!< in: field index */
+	ulint	info)	/*!< in: value to set */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields_old(rec));
+
+	mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
+}
+
+/******************************************************//**
+Returns the offset of nth field start if the record is stored in the 1-byte
+offsets form.
+@return	offset of the start of the field */
+UNIV_INLINE
+ulint
+rec_1_get_field_start_offs(
+/*=======================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields_old(rec));
+
+	if (n == 0) {
+
+		return(0);
+	}
+
+	return(rec_1_get_prev_field_end_info(rec, n)
+	       & ~REC_1BYTE_SQL_NULL_MASK);
+}
+
+/******************************************************//**
+Returns the offset of nth field start if the record is stored in the 2-byte
+offsets form.
+@return	offset of the start of the field */
+UNIV_INLINE
+ulint
+rec_2_get_field_start_offs(
+/*=======================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields_old(rec));
+
+	if (n == 0) {
+
+		return(0);
+	}
+
+	return(rec_2_get_prev_field_end_info(rec, n)
+	       & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
+}
+
+/******************************************************//**
+The following function is used to read the offset of the start of a data field
+in the record. The start of an SQL null field is the end offset of the
+previous non-null field, or 0, if none exists. If n is the number of the last
+field + 1, then the end offset of the last field is returned.
+@return	offset of the start of the field */
+UNIV_INLINE
+ulint
+rec_get_field_start_offs(
+/*=====================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: field index */
+{
+	ut_ad(rec);
+	ut_ad(n <= rec_get_n_fields_old(rec));
+
+	if (n == 0) {
+
+		return(0);
+	}
+
+	if (rec_get_1byte_offs_flag(rec)) {
+
+		return(rec_1_get_field_start_offs(rec, n));
+	}
+
+	return(rec_2_get_field_start_offs(rec, n));
+}
+
+/************************************************************//**
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size.
+@return	field size in bytes */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+	const rec_t*	rec,	/*!< in: record */
+	ulint		n)	/*!< in: index of the field */
+{
+	ulint	os;
+	ulint	next_os;
+
+	os = rec_get_field_start_offs(rec, n);
+	next_os = rec_get_field_start_offs(rec, n + 1);
+
+	ut_ad(next_os - os < UNIV_PAGE_SIZE);
+
+	return(next_os - os);
+}
+
+/***********************************************************//**
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null.
+For records in ROW_FORMAT=COMPACT (new-style records), len must not be
+UNIV_SQL_NULL unless the field already is SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+	rec_t*		rec,	/*!< in: record */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		n,	/*!< in: index number of the field */
+	const void*	data,	/*!< in: pointer to the data
+				if not SQL null */
+	ulint		len)	/*!< in: length of the data or UNIV_SQL_NULL */
+{
+	byte*	data2;
+	ulint	len2;
+
+	ut_ad(rec);
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	if (len == UNIV_SQL_NULL) {
+		if (!rec_offs_nth_sql_null(offsets, n)) {
+			ut_a(!rec_offs_comp(offsets));
+			rec_set_nth_field_sql_null(rec, n);
+		}
+
+		return;
+	}
+
+	data2 = rec_get_nth_field(rec, offsets, n, &len2);
+	if (len2 == UNIV_SQL_NULL) {
+		ut_ad(!rec_offs_comp(offsets));
+		rec_set_nth_field_null_bit(rec, n, FALSE);
+		ut_ad(len == rec_get_nth_field_size(rec, n));
+	} else {
+		ut_ad(len2 == len);
+	}
+
+	ut_memcpy(data2, data, len);
+}
+
+/**********************************************************//**
+The following function returns the data size of an old-style physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_get_data_size_old(
+/*==================*/
+	const rec_t*	rec)	/*!< in: physical record */
+{
+	ut_ad(rec);
+
+	return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
+}
+
+/**********************************************************//**
+The following function sets the number of fields in offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_fields(
+/*==================*/
+	ulint*	offsets,	/*!< in/out: array returned by
+				rec_get_offsets() */
+	ulint	n_fields)	/*!< in: number of fields */
+{
+	ut_ad(offsets);
+	ut_ad(n_fields > 0);
+	ut_ad(n_fields <= REC_MAX_N_FIELDS);
+	ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+	      <= rec_offs_get_n_alloc(offsets));
+	offsets[1] = n_fields;
+}
+
+/**********************************************************//**
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_data_size(
+/*===============*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	size;
+
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
+		& REC_OFFS_MASK;
+	ut_ad(size < UNIV_PAGE_SIZE);
+	return(size);
+}
+
+/**********************************************************//**
+Returns the total size of record minus data size of record. The value
+returned by the function is the distance from record start to record origin
+in bytes.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_extra_size(
+/*================*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	size;
+	ut_ad(rec_offs_validate(NULL, NULL, offsets));
+	size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
+	ut_ad(size < UNIV_PAGE_SIZE);
+	return(size);
+}
+
+/**********************************************************//**
+Returns the total size of a physical record.
+@return	size */
+UNIV_INLINE
+ulint
+rec_offs_size(
+/*==========*/
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************//**
+Returns a pointer to the end of the record.
+@return	pointer to end */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	return(const_cast<rec_t*>(rec + rec_offs_data_size(offsets)));
+}
+
+/**********************************************************//**
+Returns a pointer to the start of the record.
+@return	pointer to start */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+	const rec_t*	rec,	/*!< in: pointer to record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	return(const_cast<rec_t*>(rec - rec_offs_extra_size(offsets)));
+}
+#endif /* UNIV_DEBUG */
+
+/***************************************************************//**
+Copies a physical record to a buffer.
+@return	pointer to the origin of the copy */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+	void*		buf,	/*!< in: buffer */
+	const rec_t*	rec,	/*!< in: physical record */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	ulint	extra_len;
+	ulint	data_len;
+
+	ut_ad(rec && buf);
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_validate(rec, offsets));
+
+	extra_len = rec_offs_extra_size(offsets);
+	data_len = rec_offs_data_size(offsets);
+
+	ut_memcpy(buf, rec - extra_len, extra_len + data_len);
+
+	return((byte*) buf + extra_len);
+}
+
+/**********************************************************//**
+Returns the extra size of an old-style physical record if we know its
+data size and number of fields.
+@return	extra size */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+	ulint	data_size,	/*!< in: data size */
+	ulint	n_fields,	/*!< in: number of fields */
+	ulint	n_ext)		/*!< in: number of externally stored columns */
+{
+	if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
+
+		return(REC_N_OLD_EXTRA_BYTES + n_fields);
+	}
+
+	return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
+}
+
+/**********************************************************//**
+The following function returns the size of a data tuple when converted to
+a physical record.
+@return	size */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+	dict_index_t*	index,	/*!< in: record descriptor */
+	const dtuple_t*	dtuple,	/*!< in: data tuple */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+{
+	ulint	data_size;
+	ulint	extra_size;
+
+	ut_ad(index);
+	ut_ad(dtuple);
+	ut_ad(dtuple_check_typed(dtuple));
+
+	ut_ad(dict_index_is_univ(index)
+	      || dtuple_get_n_fields(dtuple)
+	      == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+		   == REC_STATUS_NODE_PTR)
+		  ? dict_index_get_n_unique_in_tree(index) + 1
+		  : dict_index_get_n_fields(index)));
+
+	if (dict_table_is_comp(index->table)) {
+		return(rec_get_converted_size_comp(index,
+						   dtuple_get_info_bits(dtuple)
+						   & REC_NEW_STATUS_MASK,
+						   dtuple->fields,
+						   dtuple->n_fields, NULL));
+	}
+
+	data_size = dtuple_get_data_size(dtuple, 0);
+
+	extra_size = rec_get_converted_extra_size(
+		data_size, dtuple_get_n_fields(dtuple), n_ext);
+
+#if 0
+	/* This code is inactive since it may be the wrong place to add
+	in the size of node pointers used in parent pages AND it is not
+	currently needed since ha_innobase::max_supported_key_length()
+	ensures that the key size limit for each page size is well below
+	the actual limit ((free space on page / 4) - record overhead).
+	But those limits will need to be raised when InnoDB can
+	support multiple page sizes.  At that time, we will need
+	to consider the node pointer on these universal btrees. */
+
+	if (dict_index_is_univ(index)) {
+		/* This is for the insert buffer B-tree.
+		All fields in the leaf tuple ascend to the
+		parent node plus the child page pointer. */
+
+		/* ibuf cannot contain externally stored fields */
+		ut_ad(n_ext == 0);
+
+		/* Add the data pointer and recompute extra_size
+		based on one more field. */
+		data_size += REC_NODE_PTR_SIZE;
+		extra_size = rec_get_converted_extra_size(
+			data_size,
+			dtuple_get_n_fields(dtuple) + 1,
+			0);
+
+		/* Be sure dtuple->n_fields has this node ptr
+		accounted for.  This function should correspond to
+		what rec_convert_dtuple_to_rec() needs in storage.
+		In optimistic insert or update-not-in-place, we will
+		have to ensure that if the record is converted to a
+		node pointer, it will not become too large.*/
+	}
+#endif
+
+	return(data_size + extra_size);
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Folds a prefix of a physical record to a ulint. Folds only existing fields,
+that is, checks that we do not run out of the record.
+@return	the folded value */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+	const rec_t*	rec,		/*!< in: the physical record */
+	const ulint*	offsets,	/*!< in: array returned by
+					rec_get_offsets() */
+	ulint		n_fields,	/*!< in: number of complete
+					fields to fold */
+	ulint		n_bytes,	/*!< in: number of bytes to fold
+					in an incomplete last field */
+	index_id_t	tree_id)	/*!< in: index tree id */
+{
+	ulint		i;
+	const byte*	data;
+	ulint		len;
+	ulint		fold;
+	ulint		n_fields_rec;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_validate(rec, offsets));
+	ut_ad(n_fields + n_bytes > 0);
+
+	n_fields_rec = rec_offs_n_fields(offsets);
+	ut_ad(n_fields <= n_fields_rec);
+	ut_ad(n_fields < n_fields_rec || n_bytes == 0);
+
+	if (n_fields > n_fields_rec) {
+		n_fields = n_fields_rec;
+	}
+
+	if (n_fields == n_fields_rec) {
+		n_bytes = 0;
+	}
+
+	fold = ut_fold_ull(tree_id);
+
+	for (i = 0; i < n_fields; i++) {
+		data = rec_get_nth_field(rec, offsets, i, &len);
+
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		data = rec_get_nth_field(rec, offsets, i, &len);
+
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	return(fold);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
new file mode 100644
index 00000000000..f8133f77466
--- /dev/null
+++ b/storage/innobase/include/rem0types.h
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file include/rem0types.h
+Record manager global types
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0types_h
+#define rem0types_h
+
+/* We define the physical record simply as an array of bytes */
+typedef byte	rec_t;
+
+/* Maximum values for various fields (for non-blob tuples) */
+#define REC_MAX_N_FIELDS	(1024 - 1)
+#define REC_MAX_HEAP_NO		(2 * 8192 - 1)
+#define REC_MAX_N_OWNED		(16 - 1)
+
+/* Maximum number of user defined fields/columns. The reserved columns
+are the ones InnoDB adds internally: DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR.
+We need "* 2" because mlog_parse_index() creates a dummy table object
+possibly, with some of the system columns in it, and then adds the 3
+system columns (again) using dict_table_add_system_columns(). The problem
+is that mlog_parse_index() cannot recognize the system columns by
+just having n_fields, n_uniq and the lengths of the columns. */
+#define REC_MAX_N_USER_FIELDS	(REC_MAX_N_FIELDS - DATA_N_SYS_COLS * 2)
+
+/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed field length (or indexed prefix length) for indexes on tables of
+ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
+Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character
+may take at most 3 bytes.  So the limit was set to 3*256, so that one
+can create a column prefix index on 256 characters of a TEXT or VARCHAR
+column also in the UTF-8 charset.
+This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
+#define REC_ANTELOPE_MAX_INDEX_COL_LEN		768
+
+/** Maximum indexed field length for table format UNIV_FORMAT_B and
+beyond.
+This (3072) is the maximum index row length allowed, so we cannot create index
+prefix column longer than that. */
+#define REC_VERSION_56_MAX_INDEX_COL_LEN	3072
+
+/** Innodb row types are a subset of the MySQL global enum row_type.
+They are made into their own enum so that switch statements can account
+for each of them. */
+enum rec_format_enum {
+	REC_FORMAT_REDUNDANT	= 0,	/*!< REDUNDANT row format */
+	REC_FORMAT_COMPACT	= 1,	/*!< COMPACT row format */
+	REC_FORMAT_COMPRESSED	= 2,	/*!< COMPRESSED row format */
+	REC_FORMAT_DYNAMIC	= 3	/*!< DYNAMIC row format */
+};
+typedef enum rec_format_enum rec_format_t;
+
+#endif
diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h
new file mode 100644
index 00000000000..a098e2f9b29
--- /dev/null
+++ b/storage/innobase/include/row0ext.h
@@ -0,0 +1,102 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ext.h
+Caching of externally stored column prefixes
+
+Created September 2006 Marko Makela
+*******************************************************/
+
+#ifndef row0ext_h
+#define row0ext_h
+
+#include "univ.i"
+#include "row0types.h"
+#include "data0types.h"
+#include "mem0mem.h"
+#include "dict0types.h"
+
+/********************************************************************//**
+Creates a cache of column prefixes of externally stored columns.
+@return	own: column prefix cache */
+UNIV_INTERN
+row_ext_t*
+row_ext_create(
+/*===========*/
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	const ulint*	ext,	/*!< in: col_no's of externally stored columns
+				in the InnoDB table object, as reported by
+				dict_col_get_no(); NOT relative to the records
+				in the clustered index */
+	ulint		flags, /*!< in: table->flags */
+	const dtuple_t*	tuple,	/*!< in: data tuple containing the field
+				references of the externally stored
+				columns; must be indexed by col_no;
+				the clustered index record must be
+				covered by a lock or a page latch
+				to prevent deletion (rollback or purge). */
+	mem_heap_t*	heap);	/*!< in: heap where created */
+
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup_ith(
+/*===============*/
+	const row_ext_t*	ext,	/*!< in/out: column prefix cache */
+	ulint			i,	/*!< in: index of ext->ext[] */
+	ulint*			len);	/*!< out: length of prefix, in bytes,
+					at most the length determined by
+					DICT_MAX_FIELD_LEN_BY_FORMAT() */
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup(
+/*===========*/
+	const row_ext_t*	ext,	/*!< in: column prefix cache */
+	ulint			col,	/*!< in: column number in the InnoDB
+					table object, as reported by
+					dict_col_get_no(); NOT relative to the
+					records in the clustered index */
+	ulint*			len);	/*!< out: length of prefix, in bytes,
+					at most the length determined by
+					DICT_MAX_FIELD_LEN_BY_FORMAT() */
+
+/** Prefixes of externally stored columns */
+struct row_ext_t{
+	ulint		n_ext;	/*!< number of externally stored columns */
+	const ulint*	ext;	/*!< col_no's of externally stored columns */
+	byte*		buf;	/*!< backing store of the column prefix cache */
+	ulint		max_len;/*!< maximum prefix length, it could be
+				REC_ANTELOPE_MAX_INDEX_COL_LEN or
+				REC_VERSION_56_MAX_INDEX_COL_LEN depending
+				on row format */
+	ulint		len[1];	/*!< prefix lengths; 0 if not cached */
+};
+
+#ifndef UNIV_NONINL
+#include "row0ext.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0ext.ic b/storage/innobase/include/row0ext.ic
new file mode 100644
index 00000000000..39e150d91d5
--- /dev/null
+++ b/storage/innobase/include/row0ext.ic
@@ -0,0 +1,87 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ext.ic
+Caching of externally stored column prefixes
+
+Created September 2006 Marko Makela
+*******************************************************/
+
+#include "rem0types.h"
+#include "btr0types.h"
+
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup_ith(
+/*===============*/
+	const row_ext_t*	ext,	/*!< in/out: column prefix cache */
+	ulint			i,	/*!< in: index of ext->ext[] */
+	ulint*			len)	/*!< out: length of prefix, in bytes,
+					at most ext->max_len */
+{
+	ut_ad(ext);
+	ut_ad(len);
+	ut_ad(i < ext->n_ext);
+
+	*len = ext->len[i];
+
+	ut_ad(*len <= ext->max_len);
+	ut_ad(ext->max_len > 0);
+
+	if (*len == 0) {
+		/* The BLOB could not be fetched to the cache. */
+		return(field_ref_zero);
+	} else {
+		return(ext->buf + i * ext->max_len);
+	}
+}
+
+/********************************************************************//**
+Looks up a column prefix of an externally stored column.
+@return column prefix, or NULL if the column is not stored externally,
+or pointer to field_ref_zero if the BLOB pointer is unset */
+UNIV_INLINE
+const byte*
+row_ext_lookup(
+/*===========*/
+	const row_ext_t*	ext,	/*!< in: column prefix cache */
+	ulint			col,	/*!< in: column number in the InnoDB
+					table object, as reported by
+					dict_col_get_no(); NOT relative to the
+					records in the clustered index */
+	ulint*			len)	/*!< out: length of prefix, in bytes,
+					at most ext->max_len */
+{
+	ulint	i;
+
+	ut_ad(ext);
+	ut_ad(len);
+
+	for (i = 0; i < ext->n_ext; i++) {
+		if (col == ext->ext[i]) {
+			return(row_ext_lookup_ith(ext, i, len));
+		}
+	}
+
+	return(NULL);
+}
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
new file mode 100644
index 00000000000..4e04a099140
--- /dev/null
+++ b/storage/innobase/include/row0ftsort.h
@@ -0,0 +1,279 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ftsort.h
+Create Full Text Index with (parallel) merge sort
+
+Created 10/13/2010 Jimmy Yang
+*******************************************************/
+
+#ifndef row0ftsort_h
+#define row0ftsort_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "row0mysql.h"
+#include "fts0fts.h"
+#include "fts0types.h"
+#include "fts0priv.h"
+#include "row0merge.h"
+
+/** This structure defineds information the scan thread will fetch
+and put to the linked list for parallel tokenization/sort threads
+to process */
+typedef struct fts_doc_item     fts_doc_item_t;
+
+/** Information about temporary files used in merge sort */
+struct fts_doc_item {
+	dfield_t*	field;		/*!< field contains document string */
+	doc_id_t	doc_id;		/*!< document ID */
+	UT_LIST_NODE_T(fts_doc_item_t)	doc_list;
+					/*!< list of doc items */
+};
+
+/** This defines the list type that scan thread would feed the parallel
+tokenization threads and sort threads. */
+typedef UT_LIST_BASE_NODE_T(fts_doc_item_t)     fts_doc_list_t;
+
+#define FTS_NUM_AUX_INDEX	6
+#define FTS_PLL_MERGE		1
+
+/** Sort information passed to each individual parallel sort thread */
+struct fts_psort_t;
+
+/** Common info passed to each parallel sort thread */
+struct fts_psort_common_t {
+	row_merge_dup_t*	dup;		/*!< descriptor of FTS index */
+	dict_table_t*		new_table;	/*!< source table */
+	trx_t*			trx;		/*!< transaction */
+	fts_psort_t*		all_info;	/*!< all parallel sort info */
+	os_event_t		sort_event;	/*!< sort event */
+	os_event_t		merge_event;	/*!< merge event */
+	ibool			opt_doc_id_size;/*!< whether to use 4 bytes
+						instead of 8 bytes integer to
+						store Doc ID during sort, if
+						Doc ID will not be big enough
+						to use 8 bytes value */
+};
+
+struct fts_psort_t {
+	ulint			psort_id;	/*!< Parallel sort ID */
+	row_merge_buf_t*	merge_buf[FTS_NUM_AUX_INDEX];
+						/*!< sort buffer */
+	merge_file_t*		merge_file[FTS_NUM_AUX_INDEX];
+						/*!< sort file */
+	row_merge_block_t*	merge_block[FTS_NUM_AUX_INDEX];
+						/*!< buffer to write to file */
+	row_merge_block_t*	block_alloc[FTS_NUM_AUX_INDEX];
+						/*!< buffer to allocated */
+	ulint			child_status;	/*!< child thread status */
+	ulint			state;		/*!< parent thread state */
+	fts_doc_list_t		fts_doc_list;	/*!< doc list to process */
+	fts_psort_common_t*	psort_common;	/*!< ptr to all psort info */
+	os_thread_t		thread_hdl;	/*!< thread handler */
+	dberr_t			error;		/*!< db error during psort */
+	ulint			memory_used;	/*!< memory used by fts_doc_list */
+	ib_mutex_t		mutex;		/*!< mutex for fts_doc_list */
+};
+
+/** Structure stores information from string tokenization operation */
+struct fts_tokenize_ctx {
+	ulint			processed_len;  /*!< processed string length */
+	ulint			init_pos;       /*!< doc start position */
+	ulint			buf_used;       /*!< the sort buffer (ID) when
+						tokenization stops, which
+						could due to sort buffer full */
+	ulint			rows_added[FTS_NUM_AUX_INDEX];
+						/*!< number of rows added for
+						each FTS index partition */
+	ib_rbt_t*		cached_stopword;/*!< in: stopword list */
+	dfield_t		sort_field[FTS_NUM_FIELDS_SORT];
+						/*!< in: sort field */
+};
+
+typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
+
+/** Structure stores information needed for the insertion phase of FTS
+parallel sort. */
+struct fts_psort_insert {
+	trx_t*		trx;		/*!< Transaction used for insertion */
+	que_t**		ins_graph;	/*!< insert graph */
+	fts_table_t	fts_table;	/*!< auxiliary table */
+	CHARSET_INFO*	charset;	/*!< charset info */
+	mem_heap_t*	heap;		/*!< heap */
+	ibool		opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
+					integer for Doc ID */
+};
+
+typedef struct fts_psort_insert	fts_psort_insert_t;
+
+
+/** status bit used for communication between parent and child thread */
+#define FTS_PARENT_COMPLETE	1
+#define FTS_PARENT_EXITING	2
+#define FTS_CHILD_COMPLETE	1
+#define FTS_CHILD_EXITING	2
+
+/** Print some debug information */
+#define	FTSORT_PRINT
+
+#ifdef	FTSORT_PRINT
+#define	DEBUG_FTS_SORT_PRINT(str)		\
+	do {					\
+		ut_print_timestamp(stderr);	\
+		fprintf(stderr, str);		\
+	} while (0)
+#else
+#define DEBUG_FTS_SORT_PRINT(str)
+#endif	/* FTSORT_PRINT */
+
+/*************************************************************//**
+Create a temporary "fts sort index" used to merge sort the
+tokenized doc string. The index has three "fields":
+
+1) Tokenized word,
+2) Doc ID
+3) Word's position in original 'doc'.
+
+@return dict_index_t structure for the fts sort index */
+UNIV_INTERN
+dict_index_t*
+row_merge_create_fts_sort_index(
+/*============================*/
+	dict_index_t*		index,	/*!< in: Original FTS index
+					based on which this sort index
+					is created */
+	const dict_table_t*	table,	/*!< in: table that FTS index
+					is being created on */
+	ibool*			opt_doc_id_size);
+					/*!< out: whether to use 4 bytes
+					instead of 8 bytes integer to
+					store Doc ID during sort */
+
+/********************************************************************//**
+Initialize FTS parallel sort structures.
+@return TRUE if all successful */
+UNIV_INTERN
+ibool
+row_fts_psort_info_init(
+/*====================*/
+	trx_t*			trx,	/*!< in: transaction */
+	row_merge_dup_t*	dup,	/*!< in,own: descriptor of
+					FTS index being created */
+	const dict_table_t*	new_table,/*!< in: table where indexes are
+					created */
+	ibool			opt_doc_id_size,
+					/*!< in: whether to use 4 bytes
+					instead of 8 bytes integer to
+					store Doc ID during sort */
+	fts_psort_t**		psort,	/*!< out: parallel sort info to be
+					instantiated */
+	fts_psort_t**		merge)	/*!< out: parallel merge info
+					to be instantiated */
+	__attribute__((nonnull));
+/********************************************************************//**
+Clean up and deallocate FTS parallel sort structures, and close
+temparary merge sort files */
+UNIV_INTERN
+void
+row_fts_psort_info_destroy(
+/*=======================*/
+	fts_psort_t*	psort_info,	/*!< parallel sort info */
+	fts_psort_t*	merge_info);	/*!< parallel merge info */
+/********************************************************************//**
+Free up merge buffers when merge sort is done */
+UNIV_INTERN
+void
+row_fts_free_pll_merge_buf(
+/*=======================*/
+	fts_psort_t*	psort_info);	/*!< in: parallel sort info */
+
+/*********************************************************************//**
+Function performs parallel tokenization of the incoming doc strings.
+@return OS_THREAD_DUMMY_RETURN */
+UNIV_INTERN
+os_thread_ret_t
+fts_parallel_tokenization(
+/*======================*/
+	void*		arg);		/*!< in: psort_info for the thread */
+/*********************************************************************//**
+Start the parallel tokenization and parallel merge sort */
+UNIV_INTERN
+void
+row_fts_start_psort(
+/*================*/
+	fts_psort_t*	psort_info);	/*!< in: parallel sort info */
+/*********************************************************************//**
+Function performs the merge and insertion of the sorted records.
+@return OS_THREAD_DUMMY_RETURN */
+UNIV_INTERN
+os_thread_ret_t
+fts_parallel_merge(
+/*===============*/
+	void*		arg);		/*!< in: parallel merge info */
+/*********************************************************************//**
+Kick off the parallel merge and insert thread */
+UNIV_INTERN
+void
+row_fts_start_parallel_merge(
+/*=========================*/
+	fts_psort_t*	merge_info);	/*!< in: parallel sort info */
+/********************************************************************//**
+Read sorted FTS data files and insert data tuples to auxillary tables.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+void
+row_fts_insert_tuple(
+/*=================*/
+	fts_psort_insert_t*
+			ins_ctx,        /*!< in: insert context */
+	fts_tokenizer_word_t* word,	/*!< in: last processed
+					tokenized word */
+	ib_vector_t*	positions,	/*!< in: word position */
+	doc_id_t*	in_doc_id,	/*!< in: last item doc id */
+	dtuple_t*	dtuple);	/*!< in: entry to insert */
+/********************************************************************//**
+Propagate a newly added record up one level in the selection tree
+@return parent where this value propagated to */
+UNIV_INTERN
+int
+row_merge_fts_sel_propagate(
+/*========================*/
+	int		propogated,	/*<! in: tree node propagated */
+	int*		sel_tree,	/*<! in: selection tree */
+	ulint		level,		/*<! in: selection tree level */
+	const mrec_t**	 mrec,		/*<! in: sort record */
+	ulint**		offsets,	/*<! in: record offsets */
+	dict_index_t*	index);		/*<! in: FTS index */
+/********************************************************************//**
+Read sorted file containing index data tuples and insert these data
+tuples to the index
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+row_fts_merge_insert(
+/*=================*/
+	dict_index_t*	index,		/*!< in: index */
+	dict_table_t*	table,		/*!< in: new table */
+	fts_psort_t*	psort_info,	/*!< parallel sort info */
+	ulint		id)		/* !< in: which auxiliary table's data
+					to insert to */
+	__attribute__((nonnull));
+#endif /* row0ftsort_h */
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
new file mode 100644
index 00000000000..aa46fdb7c27
--- /dev/null
+++ b/storage/innobase/include/row0import.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.h
+Header file for import tablespace functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0import_h
+#define row0import_h
+
+#include "univ.i"
+#include "db0err.h"
+#include "dict0types.h"
+
+// Forward declarations
+struct trx_t;
+struct dict_table_t;
+struct row_prebuilt_t;
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_for_mysql(
+/*=================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct
+						in MySQL */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_import_update_discarded_flag(
+/*=============================*/
+	trx_t*		trx,			/*!< in/out: transaction that
+						covers the update */
+	table_id_t	table_id,		/*!< in: Table for which we want
+						to set the root table->flags2 */
+	bool		discarded,		/*!< in: set MIX_LEN column bit
+						to discarded, if true */
+	bool		dict_locked)		/*!< in: Set to true if the
+						caller already owns the
+						dict_sys_t:: mutex. */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the (space, root page) of a table's indexes from the values
+in the data dictionary.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_import_update_index_root(
+/*=========================*/
+	trx_t*			trx,		/*!< in/out: transaction that
+						covers the update */
+	const dict_table_t*	table,		/*!< in: Table for which we want
+						to set the root page_no */
+	bool			reset,		/*!< in: if true then set to
+						FIL_NUL */
+	bool			dict_locked)	/*!< in: Set to true if the
+						caller already owns the
+						dict_sys_t:: mutex. */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_NONINL
+#include "row0import.ic"
+#endif
+
+#endif /* row0import_h */
diff --git a/storage/innobase/include/row0import.ic b/storage/innobase/include/row0import.ic
new file mode 100644
index 00000000000..c5bbab49f6f
--- /dev/null
+++ b/storage/innobase/include/row0import.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.ic
+
+Import tablespace inline functions.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
new file mode 100644
index 00000000000..2a892d2f5df
--- /dev/null
+++ b/storage/innobase/include/row0ins.h
@@ -0,0 +1,240 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ins.h
+Insert into a table
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0ins_h
+#define row0ins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+
+/***************************************************************//**
+Checks if foreign key constraint fails for an index entry. Sets shared locks
+which lock either the success or the failure of the constraint. NOTE that
+the caller must have a shared latch on dict_foreign_key_check_lock.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
+DB_ROW_IS_REFERENCED */
+UNIV_INTERN
+dberr_t
+row_ins_check_foreign_constraint(
+/*=============================*/
+	ibool		check_ref,/*!< in: TRUE If we want to check that
+				the referenced table is ok, FALSE if we
+				want to check the foreign key table */
+	dict_foreign_t*	foreign,/*!< in: foreign constraint; NOTE that the
+				tables mentioned in it must be in the
+				dictionary cache if they exist at all */
+	dict_table_t*	table,	/*!< in: if check_ref is TRUE, then the foreign
+				table, else the referenced table */
+	dtuple_t*	entry,	/*!< in: index entry for index */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Creates an insert node struct.
+@return	own: insert node struct */
+UNIV_INTERN
+ins_node_t*
+ins_node_create(
+/*============*/
+	ulint		ins_type,	/*!< in: INS_VALUES, ... */
+	dict_table_t*	table,		/*!< in: table where to insert */
+	mem_heap_t*	heap);		/*!< in: mem heap where created */
+/*********************************************************************//**
+Sets a new row to insert for an INS_DIRECT node. This function is only used
+if we have constructed the row separately, which is a rare case; this
+function is quite slow. */
+UNIV_INTERN
+void
+ins_node_set_new_row(
+/*=================*/
+	ins_node_t*	node,	/*!< in: insert node */
+	dtuple_t*	row);	/*!< in: new row (or first row) for the node */
+/***************************************************************//**
+Tries to insert an entry into a clustered index, ignoring foreign key
+constraints. If a record with the same unique key is found, the other
+record is necessarily marked deleted by a committed transaction, or a
+unique key violation error occurs. The delete marked record is then
+updated to an existing record, and we must write an undo log record on
+the delete marked record.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry_low(
+/*==========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		n_uniq,	/*!< in: 0 or index->n_uniq */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	ulint		n_ext,	/*!< in: number of externally stored columns */
+	que_thr_t*	thr)	/*!< in: query thread or NULL */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert an entry into a secondary index. If a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry_low(
+/*========================*/
+	ulint		flags,	/*!< in: undo logging and locking flags */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/*!< in: secondary index */
+	mem_heap_t*	offsets_heap,
+				/*!< in/out: memory heap that can be emptied */
+	mem_heap_t*	heap,	/*!< in/out: memory heap */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	trx_id_t	trx_id,	/*!< in: PAGE_MAX_TRX_ID during
+				row_log_table_apply(), or 0 */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_ins_index_entry_big_rec_func(
+/*=============================*/
+	const dtuple_t*		entry,	/*!< in/out: index entry to insert */
+	const big_rec_t*	big_rec,/*!< in: externally stored fields */
+	ulint*			offsets,/*!< in/out: rec offsets */
+	mem_heap_t**		heap,	/*!< in/out: memory heap */
+	dict_index_t*		index,	/*!< in: index */
+	const char*		file,	/*!< in: file name of caller */
+#ifndef DBUG_OFF
+	const void*		thd,	/*!< in: connection, or NULL */
+#endif /* DBUG_OFF */
+	ulint			line)	/*!< in: line number of caller */
+	__attribute__((nonnull(1,2,3,4,5,6), warn_unused_result));
+#ifdef DBUG_OFF
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+	row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line)
+#else /* DBUG_OFF */
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+	row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line)
+#endif /* DBUG_OFF */
+/***************************************************************//**
+Inserts an entry into a clustered index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry(
+/*======================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	que_thr_t*	thr,	/*!< in: query thread */
+	ulint		n_ext)	/*!< in: number of externally stored columns */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Inserts an entry into a secondary index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry(
+/*====================*/
+	dict_index_t*	index,	/*!< in: secondary index */
+	dtuple_t*	entry,	/*!< in/out: index entry to insert */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************//**
+Inserts a row to a table. This is a high-level function used in
+SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_ins_step(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/* Insert node structure */
+
+struct ins_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_INSERT */
+	ulint		ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
+	dtuple_t*	row;	/*!< row to insert */
+	dict_table_t*	table;	/*!< table where to insert */
+	sel_node_t*	select;	/*!< select in searched insert */
+	que_node_t*	values_list;/* list of expressions to evaluate and
+				insert in an INS_VALUES insert */
+	ulint		state;	/*!< node execution state */
+	dict_index_t*	index;	/*!< NULL, or the next index where the index
+				entry should be inserted */
+	dtuple_t*	entry;	/*!< NULL, or entry to insert in the index;
+				after a successful insert of the entry,
+				this should be reset to NULL */
+	UT_LIST_BASE_NODE_T(dtuple_t)
+			entry_list;/* list of entries, one for each index */
+	byte*		row_id_buf;/* buffer for the row id sys field in row */
+	trx_id_t	trx_id;	/*!< trx id or the last trx which executed the
+				node */
+	byte*		trx_id_buf;/* buffer for the trx id sys field in row */
+	mem_heap_t*	entry_sys_heap;
+				/* memory heap used as auxiliary storage;
+				entry_list and sys fields are stored here;
+				if this is NULL, entry list should be created
+				and buffers for sys fields in row allocated */
+	ulint		magic_n;
+};
+
+#define	INS_NODE_MAGIC_N	15849075
+
+/* Insert node types */
+#define INS_SEARCHED	0	/* INSERT INTO ... SELECT ... */
+#define INS_VALUES	1	/* INSERT INTO ... VALUES ... */
+#define INS_DIRECT	2	/* this is for internal use in dict0crea:
+				insert the row directly */
+
+/* Node execution states */
+#define	INS_NODE_SET_IX_LOCK	1	/* we should set an IX lock on table */
+#define INS_NODE_ALLOC_ROW_ID	2	/* row id should be allocated */
+#define	INS_NODE_INSERT_ENTRIES 3	/* index entries should be built and
+					inserted */
+
+#ifndef UNIV_NONINL
+#include "row0ins.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic
new file mode 100644
index 00000000000..9c191d869a2
--- /dev/null
+++ b/storage/innobase/include/row0ins.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0ins.ic
+Insert into a table
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
new file mode 100644
index 00000000000..62715fe8808
--- /dev/null
+++ b/storage/innobase/include/row0log.h
@@ -0,0 +1,239 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.h
+Modification log for online index creation and online table rebuild
+
+Created 2011-05-26 Marko Makela
+*******************************************************/
+
+#ifndef row0log_h
+#define row0log_h
+
+#include "univ.i"
+#include "mtr0types.h"
+#include "row0types.h"
+#include "rem0types.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+
+/******************************************************//**
+Allocate the row log for an index and flag the index
+for online creation.
+@retval true if success, false if not */
+UNIV_INTERN
+bool
+row_log_allocate(
+/*=============*/
+	dict_index_t*	index,	/*!< in/out: index */
+	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
+				or NULL when creating a secondary index */
+	bool		same_pk,/*!< in: whether the definition of the
+				PRIMARY KEY has remained the same */
+	const dtuple_t*	add_cols,
+				/*!< in: default values of
+				added columns, or NULL */
+	const ulint*	col_map)/*!< in: mapping of old column
+				numbers to new ones, or NULL if !table */
+	__attribute__((nonnull(1), warn_unused_result));
+
+/******************************************************//**
+Free the row log for an index that was being created online. */
+UNIV_INTERN
+void
+row_log_free(
+/*=========*/
+	row_log_t*&	log)	/*!< in,own: row log */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*==============*/
+	dict_index_t*	index)	/*!< in/out: index (x-latched) */
+	__attribute__((nonnull));
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval	true if the operation was logged or can be ignored
+@retval	false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t* tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************//**
+Logs an operation to a secondary index that is (or was) being created. */
+UNIV_INTERN
+void
+row_log_online_op(
+/*==============*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t*	tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+	UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: clustered index of a table
+					that is being rebuilt online */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Logs a delete operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_delete(). */
+UNIV_INTERN
+void
+row_log_table_delete(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	const byte*	sys)	/*!< in: DB_TRX_ID,DB_ROLL_PTR that should
+				be logged, or NULL to use those in rec */
+	UNIV_COLD __attribute__((nonnull(1,2,3)));
+
+/******************************************************//**
+Logs an update operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_update(). */
+UNIV_INTERN
+void
+row_log_table_update(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index) */
+	const dtuple_t*	old_pk)	/*!< in: row_log_table_get_pk()
+				before the update */
+	UNIV_COLD __attribute__((nonnull(1,2,3)));
+
+/******************************************************//**
+Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
+of a table that is being rebuilt.
+@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
+or NULL if the PRIMARY KEY definition does not change */
+UNIV_INTERN
+const dtuple_t*
+row_log_table_get_pk(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index),
+				or NULL */
+	byte*		sys,	/*!< out: DB_TRX_ID,DB_ROLL_PTR for
+				row_log_table_delete(), or NULL */
+	mem_heap_t**	heap)	/*!< in/out: memory heap where allocated */
+	UNIV_COLD __attribute__((nonnull(1,2,5), warn_unused_result));
+
+/******************************************************//**
+Logs an insert to a table that is being rebuilt.
+This will be merged in row_log_table_apply_insert(). */
+UNIV_INTERN
+void
+row_log_table_insert(
+/*=================*/
+	const rec_t*	rec,	/*!< in: clustered index leaf page record,
+				page X-latched */
+	dict_index_t*	index,	/*!< in/out: clustered index, S-latched
+				or X-latched */
+	const ulint*	offsets)/*!< in: rec_get_offsets(rec,index) */
+	UNIV_COLD __attribute__((nonnull));
+/******************************************************//**
+Notes that a BLOB is being freed during online ALTER TABLE. */
+UNIV_INTERN
+void
+row_log_table_blob_free(
+/*====================*/
+	dict_index_t*	index,	/*!< in/out: clustered index, X-latched */
+	ulint		page_no)/*!< in: starting page number of the BLOB */
+	UNIV_COLD __attribute__((nonnull));
+/******************************************************//**
+Notes that a BLOB is being allocated during online ALTER TABLE. */
+UNIV_INTERN
+void
+row_log_table_blob_alloc(
+/*=====================*/
+	dict_index_t*	index,	/*!< in/out: clustered index, X-latched */
+	ulint		page_no)/*!< in: starting page number of the BLOB */
+	UNIV_COLD __attribute__((nonnull));
+/******************************************************//**
+Apply the row_log_table log to a table upon completing rebuild.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_table_apply(
+/*================*/
+	que_thr_t*	thr,	/*!< in: query graph */
+	dict_table_t*	old_table,
+				/*!< in: old table */
+	struct TABLE*	table)	/*!< in/out: MySQL table
+				(for reporting duplicates) */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Get the latest transaction ID that has invoked row_log_online_op()
+during online creation.
+@return latest transaction ID, or 0 if nothing was logged */
+UNIV_INTERN
+trx_id_t
+row_log_get_max_trx(
+/*================*/
+	dict_index_t*	index)	/*!< in: index, must be locked */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Merge the row log to the index upon completing index creation.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_apply(
+/*==========*/
+	trx_t*		trx,	/*!< in: transaction (for checking if
+				the operation was interrupted) */
+	dict_index_t*	index,	/*!< in/out: secondary index */
+	struct TABLE*	table)	/*!< in/out: MySQL table
+				(for reporting duplicates) */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "row0log.ic"
+#endif
+
+#endif /* row0log.h */
diff --git a/storage/innobase/include/row0log.ic b/storage/innobase/include/row0log.ic
new file mode 100644
index 00000000000..b0f37dbd8e7
--- /dev/null
+++ b/storage/innobase/include/row0log.ic
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.ic
+Modification log for online index creation and online table rebuild
+
+Created 2012-10-18 Marko Makela
+*******************************************************/
+
+#include "dict0dict.h"
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*===============*/
+	dict_index_t*	index)	/*!< in/out: index (x-latched) */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(!dict_index_is_clust(index));
+	dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
+	row_log_free(index->online_log);
+}
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval	true if the operation was logged or can be ignored
+@retval	false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+	dict_index_t*	index,	/*!< in/out: index, S or X latched */
+	const dtuple_t* tuple,	/*!< in: index tuple */
+	trx_id_t	trx_id)	/*!< in: transaction ID for insert,
+				or 0 for delete */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+	      || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	switch (dict_index_get_online_status(index)) {
+	case ONLINE_INDEX_COMPLETE:
+		/* This is a normal index. Do not log anything.
+		The caller must perform the operation on the
+		index tree directly. */
+		return(false);
+	case ONLINE_INDEX_CREATION:
+		/* The index is being created online. Log the
+		operation. */
+		row_log_online_op(index, tuple, trx_id);
+		break;
+	case ONLINE_INDEX_ABORTED:
+	case ONLINE_INDEX_ABORTED_DROPPED:
+		/* The index was created online, but the operation was
+		aborted. Do not log the operation and tell the caller
+		to skip the operation. */
+		break;
+	}
+
+	return(true);
+}
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
new file mode 100644
index 00000000000..2b9e9f7711c
--- /dev/null
+++ b/storage/innobase/include/row0merge.h
@@ -0,0 +1,430 @@
+/*****************************************************************************
+
+Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0merge.h
+Index build routines using a merge sort
+
+Created 13/06/2005 Jan Lindstrom
+*******************************************************/
+
+#ifndef row0merge_h
+#define row0merge_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "mtr0mtr.h"
+#include "rem0types.h"
+#include "rem0rec.h"
+#include "read0types.h"
+#include "btr0types.h"
+#include "row0mysql.h"
+#include "lock0types.h"
+#include "srv0srv.h"
+
+// Forward declaration
+struct ib_sequence_t;
+
+/** @brief Block size for I/O operations in merge sort.
+
+The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
+rounded to a power of 2.
+
+When not creating a PRIMARY KEY that contains column prefixes, this
+can be set as small as UNIV_PAGE_SIZE / 2. */
+typedef byte	row_merge_block_t;
+
+/** @brief Secondary buffer for I/O operations of merge records.
+
+This buffer is used for writing or reading a record that spans two
+row_merge_block_t.  Thus, it must be able to hold one merge record,
+whose maximum size is the same as the minimum size of
+row_merge_block_t. */
+typedef byte	mrec_buf_t[UNIV_PAGE_SIZE_MAX];
+
+/** @brief Merge record in row_merge_block_t.
+
+The format is the same as a record in ROW_FORMAT=COMPACT with the
+exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
+typedef byte	mrec_t;
+
+/** Merge record in row_merge_buf_t */
+struct mtuple_t {
+	dfield_t*	fields;		/*!< data fields */
+};
+
+/** Buffer for sorting in main memory. */
+struct row_merge_buf_t {
+	mem_heap_t*	heap;		/*!< memory heap where allocated */
+	dict_index_t*	index;		/*!< the index the tuples belong to */
+	ulint		total_size;	/*!< total amount of data bytes */
+	ulint		n_tuples;	/*!< number of data tuples */
+	ulint		max_tuples;	/*!< maximum number of data tuples */
+	mtuple_t*	tuples;		/*!< array of data tuples */
+	mtuple_t*	tmp_tuples;	/*!< temporary copy of tuples,
+					for sorting */
+};
+
+/** Information about temporary files used in merge sort */
+struct merge_file_t {
+	int		fd;		/*!< file descriptor */
+	ulint		offset;		/*!< file offset (end of file) */
+	ib_uint64_t	n_rec;		/*!< number of records in the file */
+};
+
+/** Index field definition */
+struct index_field_t {
+	ulint		col_no;		/*!< column offset */
+	ulint		prefix_len;	/*!< column prefix length, or 0
+					if indexing the whole column */
+};
+
+/** Definition of an index being created */
+struct index_def_t {
+	const char*	name;		/*!< index name */
+	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
+					or DICT_CLUSTERED */
+	ulint		key_number;	/*!< MySQL key number,
+					or ULINT_UNDEFINED if none */
+	ulint		n_fields;	/*!< number of fields in index */
+	index_field_t*	fields;		/*!< field definitions */
+};
+
+/** Structure for reporting duplicate records. */
+struct row_merge_dup_t {
+	dict_index_t*		index;	/*!< index being sorted */
+	struct TABLE*		table;	/*!< MySQL table object */
+	const ulint*		col_map;/*!< mapping of column numbers
+					in table to the rebuilt table
+					(index->table), or NULL if not
+					rebuilding table */
+	ulint			n_dup;	/*!< number of duplicates */
+};
+
+/*************************************************************//**
+Report a duplicate key. */
+UNIV_INTERN
+void
+row_merge_dup_report(
+/*=================*/
+	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
+	const dfield_t*		entry)	/*!< in: duplicate index entry */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Sets an exclusive lock on a table, for the duration of creating indexes.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_merge_lock_table(
+/*=================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes_dict(
+/*========================*/
+	trx_t*		trx,	/*!< in/out: dictionary transaction */
+	table_id_t	table_id)/*!< in: table identifier */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Drop those indexes which were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes(
+/*===================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	dict_table_t*	table,	/*!< in/out: table containing the indexes */
+	ibool		locked)	/*!< in: TRUE=table locked,
+				FALSE=may need to do a lazy drop */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Drop all partially created indexes during crash recovery. */
+UNIV_INTERN
+void
+row_merge_drop_temp_indexes(void);
+/*=============================*/
+
+/*********************************************************************//**
+Creates temporary merge files, and if UNIV_PFS_IO defined, register
+the file descriptor with Performance Schema.
+@return File descriptor */
+UNIV_INTERN
+int
+row_merge_file_create_low(void)
+/*===========================*/
+	__attribute__((warn_unused_result));
+/*********************************************************************//**
+Destroy a merge file. And de-register the file from Performance Schema
+if UNIV_PFS_IO is defined. */
+UNIV_INTERN
+void
+row_merge_file_destroy_low(
+/*=======================*/
+	int		fd);	/*!< in: merge file descriptor */
+
+/*********************************************************************//**
+Provide a new pathname for a table that is being renamed if it belongs to
+a file-per-table tablespace.  The caller is responsible for freeing the
+memory allocated for the return value.
+@return	new pathname of tablespace file, or NULL if space = 0 */
+UNIV_INTERN
+char*
+row_make_new_pathname(
+/*==================*/
+	dict_table_t*	table,		/*!< in: table to be renamed */
+	const char*	new_name);	/*!< in: new name */
+/*********************************************************************//**
+Rename the tables in the data dictionary.  The data dictionary must
+have been locked exclusively by the caller, because the transaction
+will not be committed.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_merge_rename_tables_dict(
+/*=========================*/
+	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
+					tmp_name */
+	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
+					old_table->name */
+	const char*	tmp_name,	/*!< in: new name for old_table */
+	trx_t*		trx)		/*!< in/out: dictionary transaction */
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Rename an index in the dictionary that was created. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return	DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+row_merge_rename_index_to_add(
+/*==========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	table_id_t	table_id,	/*!< in: table identifier */
+	index_id_t	index_id)	/*!< in: index identifier */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Rename an index in the dictionary that is to be dropped. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return	DB_SUCCESS if all OK */
+UNIV_INTERN
+dberr_t
+row_merge_rename_index_to_drop(
+/*===========================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	table_id_t	table_id,	/*!< in: table identifier */
+	index_id_t	index_id)	/*!< in: index identifier */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Create the index and load in to the dictionary.
+@return	index, or NULL on error */
+UNIV_INTERN
+dict_index_t*
+row_merge_create_index(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: trx (sets error_state) */
+	dict_table_t*		table,	/*!< in: the index is on this table */
+	const index_def_t*	index_def);
+					/*!< in: the index definition */
+/*********************************************************************//**
+Check if a transaction can use an index.
+@return	TRUE if index can be used by the transaction else FALSE */
+UNIV_INTERN
+ibool
+row_merge_is_index_usable(
+/*======================*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_index_t*	index);	/*!< in: index to check */
+/*********************************************************************//**
+Drop a table. The caller must have ensured that the background stats
+thread is not processing the table. This can be done by calling
+dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
+before calling this function.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_merge_drop_table(
+/*=================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	table)		/*!< in: table instance to drop */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Build indexes on a table by reading a clustered index,
+creating a temporary file containing index entries, merge sorting
+these index entries and inserting sorted index entries to indexes.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_merge_build_indexes(
+/*====================*/
+	trx_t*		trx,		/*!< in: transaction */
+	dict_table_t*	old_table,	/*!< in: table where rows are
+					read from */
+	dict_table_t*	new_table,	/*!< in: table where indexes are
+					created; identical to old_table
+					unless creating a PRIMARY KEY */
+	bool		online,		/*!< in: true if creating indexes
+					online */
+	dict_index_t**	indexes,	/*!< in: indexes to be created */
+	const ulint*	key_numbers,	/*!< in: MySQL key numbers */
+	ulint		n_indexes,	/*!< in: size of indexes[] */
+	struct TABLE*	table,		/*!< in/out: MySQL table, for
+					reporting erroneous key value
+					if applicable */
+	const dtuple_t*	add_cols,	/*!< in: default values of
+					added columns, or NULL */
+	const ulint*	col_map,	/*!< in: mapping of old column
+					numbers to new ones, or NULL
+					if old_table == new_table */
+	ulint		add_autoinc,	/*!< in: number of added
+					AUTO_INCREMENT column, or
+					ULINT_UNDEFINED if none is added */
+	ib_sequence_t&	sequence)	/*!< in/out: autoinc sequence */
+	__attribute__((nonnull(1,2,3,5,6,8), warn_unused_result));
+/********************************************************************//**
+Write a buffer to a block. */
+UNIV_INTERN
+void
+row_merge_buf_write(
+/*================*/
+	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
+	const merge_file_t*	of,	/*!< in: output file */
+	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
+	__attribute__((nonnull));
+/********************************************************************//**
+Sort a buffer. */
+UNIV_INTERN
+void
+row_merge_buf_sort(
+/*===============*/
+	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
+	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
+					(NULL if non-unique index) */
+	__attribute__((nonnull(1)));
+/********************************************************************//**
+Write a merge block to the file system.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+row_merge_write(
+/*============*/
+	int		fd,	/*!< in: file descriptor */
+	ulint		offset,	/*!< in: offset where to write,
+				in number of row_merge_block_t elements */
+	const void*	buf);	/*!< in: data */
+/********************************************************************//**
+Empty a sort buffer.
+@return sort buffer */
+UNIV_INTERN
+row_merge_buf_t*
+row_merge_buf_empty(
+/*================*/
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
+	__attribute__((warn_unused_result, nonnull));
+/*********************************************************************//**
+Create a merge file.
+@return file descriptor, or -1 on failure */
+UNIV_INTERN
+int
+row_merge_file_create(
+/*==================*/
+	merge_file_t*	merge_file)	/*!< out: merge file structure */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Merge disk files.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_merge_sort(
+/*===========*/
+	trx_t*			trx,	/*!< in: transaction */
+	const row_merge_dup_t*	dup,	/*!< in: descriptor of
+					index being created */
+	merge_file_t*		file,	/*!< in/out: file containing
+					index entries */
+	row_merge_block_t*	block,	/*!< in/out: 3 buffers */
+	int*			tmpfd)	/*!< in/out: temporary file handle */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Allocate a sort buffer.
+@return own: sort buffer */
+UNIV_INTERN
+row_merge_buf_t*
+row_merge_buf_create(
+/*=================*/
+	dict_index_t*	index)	/*!< in: secondary index */
+	__attribute__((warn_unused_result, nonnull, malloc));
+/*********************************************************************//**
+Deallocate a sort buffer. */
+UNIV_INTERN
+void
+row_merge_buf_free(
+/*===============*/
+	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Destroy a merge file. */
+UNIV_INTERN
+void
+row_merge_file_destroy(
+/*===================*/
+	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
+	__attribute__((nonnull));
+/********************************************************************//**
+Read a merge block from the file system.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INTERN
+ibool
+row_merge_read(
+/*===========*/
+	int			fd,	/*!< in: file descriptor */
+	ulint			offset,	/*!< in: offset where to read
+					in number of row_merge_block_t
+					elements */
+	row_merge_block_t*	buf);	/*!< out: data */
+/********************************************************************//**
+Read a merge record.
+@return pointer to next record, or NULL on I/O error or end of list */
+UNIV_INTERN
+const byte*
+row_merge_read_rec(
+/*===============*/
+	row_merge_block_t*	block,	/*!< in/out: file buffer */
+	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
+	const byte*		b,	/*!< in: pointer to record */
+	const dict_index_t*	index,	/*!< in: index of the record */
+	int			fd,	/*!< in: file descriptor */
+	ulint*			foffs,	/*!< in/out: file offset */
+	const mrec_t**		mrec,	/*!< out: pointer to merge record,
+					or NULL on end of list
+					(non-NULL on I/O error) */
+	ulint*			offsets)/*!< out: offsets of mrec */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
new file mode 100644
index 00000000000..06c07002c2b
--- /dev/null
+++ b/storage/innobase/include/row0mysql.h
@@ -0,0 +1,915 @@
+/*****************************************************************************
+
+Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0mysql.h
+Interface between Innobase row operations and MySQL.
+Contains also create table and other data dictionary operations.
+
+Created 9/17/2000 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0mysql_h
+#define row0mysql_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "btr0pcur.h"
+#include "trx0types.h"
+
+// Forward declaration
+struct SysIndexCallback;
+
+extern ibool row_rollback_on_timeout;
+
+struct row_prebuilt_t;
+
+/*******************************************************************//**
+Frees the blob heap in prebuilt when no longer needed. */
+UNIV_INTERN
+void
+row_mysql_prebuilt_free_blob_heap(
+/*==============================*/
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct of a
+					ha_innobase:: table handle */
+/*******************************************************************//**
+Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
+format.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
+byte*
+row_mysql_store_true_var_len(
+/*=========================*/
+	byte*	dest,	/*!< in: where to store */
+	ulint	len,	/*!< in: length, must fit in two bytes */
+	ulint	lenlen);/*!< in: storage length of len: either 1 or 2 bytes */
+/*******************************************************************//**
+Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
+returns a pointer to the data.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+UNIV_INTERN
+const byte*
+row_mysql_read_true_varchar(
+/*========================*/
+	ulint*		len,	/*!< out: variable-length field length */
+	const byte*	field,	/*!< in: field in the MySQL format */
+	ulint		lenlen);/*!< in: storage length of len: either 1
+				or 2 bytes */
+/*******************************************************************//**
+Stores a reference to a BLOB in the MySQL format. */
+UNIV_INTERN
+void
+row_mysql_store_blob_ref(
+/*=====================*/
+	byte*		dest,	/*!< in: where to store */
+	ulint		col_len,/*!< in: dest buffer size: determines into
+				how many bytes the BLOB length is stored,
+				the space for the length may vary from 1
+				to 4 bytes */
+	const void*	data,	/*!< in: BLOB data; if the value to store
+				is SQL NULL this should be NULL pointer */
+	ulint		len);	/*!< in: BLOB length; if the value to store
+				is SQL NULL this should be 0; remember
+				also to set the NULL bit in the MySQL record
+				header! */
+/*******************************************************************//**
+Reads a reference to a BLOB in the MySQL format.
+@return	pointer to BLOB data */
+UNIV_INTERN
+const byte*
+row_mysql_read_blob_ref(
+/*====================*/
+	ulint*		len,		/*!< out: BLOB length */
+	const byte*	ref,		/*!< in: BLOB reference in the
+					MySQL format */
+	ulint		col_len);	/*!< in: BLOB reference length
+					(not BLOB length) */
+/**************************************************************//**
+Pad a column with spaces. */
+UNIV_INTERN
+void
+row_mysql_pad_col(
+/*==============*/
+	ulint	mbminlen,	/*!< in: minimum size of a character,
+				in bytes */
+	byte*	pad,		/*!< out: padded buffer */
+	ulint	len);		/*!< in: number of bytes to pad */
+
+/**************************************************************//**
+Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
+The counterpart of this function is row_sel_field_store_in_mysql_format() in
+row0sel.cc.
+@return	up to which byte we used buf in the conversion */
+UNIV_INTERN
+byte*
+row_mysql_store_col_in_innobase_format(
+/*===================================*/
+	dfield_t*	dfield,		/*!< in/out: dfield where dtype
+					information must be already set when
+					this function is called! */
+	byte*		buf,		/*!< in/out: buffer for a converted
+					integer value; this must be at least
+					col_len long then! NOTE that dfield
+					may also get a pointer to 'buf',
+					therefore do not discard this as long
+					as dfield is used! */
+	ibool		row_format_col,	/*!< TRUE if the mysql_data is from
+					a MySQL row, FALSE if from a MySQL
+					key value;
+					in MySQL, a true VARCHAR storage
+					format differs in a row and in a
+					key value: in a key value the length
+					is always stored in 2 bytes! */
+	const byte*	mysql_data,	/*!< in: MySQL column value, not
+					SQL NULL; NOTE that dfield may also
+					get a pointer to mysql_data,
+					therefore do not discard this as long
+					as dfield is used! */
+	ulint		col_len,	/*!< in: MySQL column length; NOTE that
+					this is the storage length of the
+					column in the MySQL format row, not
+					necessarily the length of the actual
+					payload data; if the column is a true
+					VARCHAR then this is irrelevant */
+	ulint		comp);		/*!< in: nonzero=compact format */
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return true if it was a lock wait and we should continue running the
+query thread */
+UNIV_INTERN
+bool
+row_mysql_handle_errors(
+/*====================*/
+	dberr_t*	new_err,/*!< out: possible new error encountered in
+				rollback, or the old error which was
+				during the function entry */
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t*	thr,	/*!< in: query thread, or NULL */
+	trx_savept_t*	savept)	/*!< in: savepoint, or NULL */
+	__attribute__((nonnull(1,2)));
+/********************************************************************//**
+Create a prebuilt struct for a MySQL table handle.
+@return	own: a prebuilt struct */
+UNIV_INTERN
+row_prebuilt_t*
+row_create_prebuilt(
+/*================*/
+	dict_table_t*	table,		/*!< in: Innobase table handle */
+	ulint		mysql_row_len);	/*!< in: length in bytes of a row in
+					the MySQL format */
+/********************************************************************//**
+Free a prebuilt struct for a MySQL table handle. */
+UNIV_INTERN
+void
+row_prebuilt_free(
+/*==============*/
+	row_prebuilt_t*	prebuilt,	/*!< in, own: prebuilt struct */
+	ibool		dict_locked);	/*!< in: TRUE=data dictionary locked */
+/*********************************************************************//**
+Updates the transaction pointers in query graphs stored in the prebuilt
+struct. */
+UNIV_INTERN
+void
+row_update_prebuilt_trx(
+/*====================*/
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct
+					in MySQL handle */
+	trx_t*		trx);		/*!< in: transaction handle */
+/*********************************************************************//**
+Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
+AUTO_INC lock gives exclusive access to the auto-inc counter of the
+table. The lock is reserved only for the duration of an SQL statement.
+It is not compatible with another AUTO_INC or exclusive lock on the
+table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_lock_table_autoinc_for_mysql(
+/*=============================*/
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in the MySQL
+					table handle */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Sets a table lock on the table mentioned in prebuilt.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_lock_table_for_mysql(
+/*=====================*/
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in the MySQL
+					table handle */
+	dict_table_t*	table,		/*!< in: table to lock, or NULL
+					if prebuilt->table should be
+					locked as
+					prebuilt->select_lock_type */
+	ulint		mode)		/*!< in: lock mode of table
+					(ignored if table==NULL) */
+	__attribute__((nonnull(1)));
+/*********************************************************************//**
+Does an insert for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_insert_for_mysql(
+/*=================*/
+	byte*		mysql_rec,	/*!< in: row in the MySQL format */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
+					handle */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Builds a dummy query graph used in selects. */
+UNIV_INTERN
+void
+row_prebuild_sel_graph(
+/*===================*/
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
+					handle */
+/*********************************************************************//**
+Gets pointer to a prebuilt update vector used in updates. If the update
+graph has not yet been built in the prebuilt struct, then this function
+first builds it.
+@return	prebuilt update vector */
+UNIV_INTERN
+upd_t*
+row_get_prebuilt_update_vector(
+/*===========================*/
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
+					handle */
+/*********************************************************************//**
+Checks if a table is such that we automatically created a clustered
+index on it (on row id).
+@return	TRUE if the clustered index was generated automatically */
+UNIV_INTERN
+ibool
+row_table_got_default_clust_index(
+/*==============================*/
+	const dict_table_t*	table);	/*!< in: table */
+/*********************************************************************//**
+Does an update or delete of a row for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_update_for_mysql(
+/*=================*/
+	byte*		mysql_rec,	/*!< in: the row to be updated, in
+					the MySQL format */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
+					handle */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
+session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
+Before calling this function row_search_for_mysql() must have
+initialized prebuilt->new_rec_locks to store the information which new
+record locks really were set. This function removes a newly set
+clustered index record lock under prebuilt->pcur or
+prebuilt->clust_pcur.  Thus, this implements a 'mini-rollback' that
+releases the latest clustered index record lock we set. */
+UNIV_INTERN
+void
+row_unlock_for_mysql(
+/*=================*/
+	row_prebuilt_t*	prebuilt,	/*!< in/out: prebuilt struct in MySQL
+					handle */
+	ibool		has_latches_on_recs)/*!< in: TRUE if called
+					so that we have the latches on
+					the records under pcur and
+					clust_pcur, and we do not need
+					to reposition the cursors. */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+	const char*	name) __attribute__((warn_unused_result));
+				/*!< in: table name in the form
+				'database/tablename' */
+
+/*********************************************************************//**
+Creates an query graph node of 'update' type to be used in the MySQL
+interface.
+@return	own: update node */
+UNIV_INTERN
+upd_node_t*
+row_create_update_node_for_mysql(
+/*=============================*/
+	dict_table_t*	table,	/*!< in: table to update */
+	mem_heap_t*	heap);	/*!< in: mem heap from which allocated */
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_update_cascade_for_mysql(
+/*=========================*/
+	que_thr_t*	thr,	/*!< in: query thread */
+	upd_node_t*	node,	/*!< in: update node used in the cascade
+				or set null operation */
+	dict_table_t*	table)	/*!< in: table where we do the operation */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Locks the data dictionary exclusively for performing a table create or other
+data dictionary modification operation. */
+UNIV_INTERN
+void
+row_mysql_lock_data_dictionary_func(
+/*================================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	file,	/*!< in: file name */
+	ulint		line);	/*!< in: line number */
+#define row_mysql_lock_data_dictionary(trx)				\
+	row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
+/*********************************************************************//**
+Unlocks the data dictionary exclusive lock. */
+UNIV_INTERN
+void
+row_mysql_unlock_data_dictionary(
+/*=============================*/
+	trx_t*	trx);	/*!< in/out: transaction */
+/*********************************************************************//**
+Locks the data dictionary in shared mode from modifications, for performing
+foreign key check, rollback, or other operation invisible to MySQL. */
+UNIV_INTERN
+void
+row_mysql_freeze_data_dictionary_func(
+/*==================================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	const char*	file,	/*!< in: file name */
+	ulint		line);	/*!< in: line number */
+#define row_mysql_freeze_data_dictionary(trx)				\
+	row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
+/*********************************************************************//**
+Unlocks the data dictionary shared lock. */
+UNIV_INTERN
+void
+row_mysql_unfreeze_data_dictionary(
+/*===============================*/
+	trx_t*	trx);	/*!< in/out: transaction */
+/*********************************************************************//**
+Creates a table for MySQL. If the name of the table ends in
+one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
+"innodb_table_monitor", then this will also start the printing of monitor
+output by the master thread. If the table name ends in "innodb_mem_validate",
+InnoDB will try to invoke mem_validate(). On failure the transaction will
+be rolled back.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_create_table_for_mysql(
+/*=======================*/
+	dict_table_t*	table,	/*!< in, own: table definition
+				(will be freed, or on DB_SUCCESS
+				added to the data dictionary cache) */
+	trx_t*		trx,	/*!< in/out: transaction */
+	bool		commit)	/*!< in: if true, commit the transaction */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Does an index creation operation for MySQL. TODO: currently failure
+to create an index results in dropping the whole table! This is no problem
+currently as all indexes must be created at the same time as the table.
+@return	error number or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_create_index_for_mysql(
+/*=======================*/
+	dict_index_t*	index,		/*!< in, own: index definition
+					(will be freed) */
+	trx_t*		trx,		/*!< in: transaction handle */
+	const ulint*	field_lengths)	/*!< in: if not NULL, must contain
+					dict_index_get_n_fields(index)
+					actual field lengths for the
+					index columns, which are
+					then checked for not being too
+					large. */
+	__attribute__((nonnull(1,2), warn_unused_result));
+/*********************************************************************//**
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_table_add_foreign_constraints(
+/*==============================*/
+	trx_t*		trx,		/*!< in: transaction */
+	const char*	sql_string,	/*!< in: table create statement where
+					foreign keys are declared like:
+				FOREIGN KEY (a, b) REFERENCES table2(c, d),
+					table2 can be written also with the
+					database name before it: test.table2 */
+	size_t		sql_length,	/*!< in: length of sql_string */
+	const char*	name,		/*!< in: table full name in the
+					normalized form
+					database_name/table_name */
+	ibool		reject_fks)	/*!< in: if TRUE, fail with error
+					code DB_CANNOT_ADD_CONSTRAINT if
+					any foreign keys are found. */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+The master thread in srv0srv.cc calls this regularly to drop tables which
+we must drop in background after queries to them have ended. Such lazy
+dropping of tables is needed in ALTER TABLE on Unix.
+@return	how many tables dropped + remaining tables in list */
+UNIV_INTERN
+ulint
+row_drop_tables_for_mysql_in_background(void);
+/*=========================================*/
+/*********************************************************************//**
+Get the background drop list length. NOTE: the caller must own the kernel
+mutex!
+@return	how many tables in list */
+UNIV_INTERN
+ulint
+row_get_background_drop_list_len_low(void);
+/*======================================*/
+/*********************************************************************//**
+Sets an exclusive lock on a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+	trx_t*		trx,		/*!< in/out: transaction */
+	dict_table_t*	table,		/*!< in: table to lock */
+	enum lock_mode	mode,		/*!< in: LOCK_X or LOCK_S */
+	const char*	op_info)	/*!< in: string for trx->op_info */
+	__attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Truncates a table for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_truncate_table_for_mysql(
+/*=========================*/
+	dict_table_t*	table,	/*!< in: table handle */
+	trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Drops a table for MySQL.  If the name of the dropped table ends in
+one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
+"innodb_table_monitor", then this will also stop the printing of monitor
+output by the master thread.  If the data dictionary was not already locked
+by the transaction, the transaction will be committed.  Otherwise, the
+data dictionary will remain locked.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_drop_table_for_mysql(
+/*=====================*/
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx,	/*!< in: dictionary transaction handle */
+	bool		drop_db,/*!< in: true=dropping whole database */
+	bool		nonatomic = true)
+				/*!< in: whether it is permitted
+				to release and reacquire dict_operation_lock */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Drop all temporary tables during crash recovery. */
+UNIV_INTERN
+void
+row_mysql_drop_temp_tables(void);
+/*============================*/
+
+/*********************************************************************//**
+Discards the tablespace of a table which stored in an .ibd file. Discarding
+means that this function deletes the .ibd file and assigns a new table id for
+the table. Also the flag table->ibd_file_missing is set TRUE.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_discard_tablespace_for_mysql(
+/*=============================*/
+	const char*	name,	/*!< in: table name */
+	trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull, warn_unused_result));
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_tablespace_for_mysql(
+/*============================*/
+	dict_table_t*	table,		/*!< in/out: table */
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL */
+        __attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Drops a database for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_drop_database_for_mysql(
+/*========================*/
+	const char*	name,	/*!< in: database name which ends to '/' */
+	trx_t*		trx)	/*!< in: transaction handle */
+	__attribute__((nonnull));
+/*********************************************************************//**
+Renames a table for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_rename_table_for_mysql(
+/*=======================*/
+	const char*	old_name,	/*!< in: old table name */
+	const char*	new_name,	/*!< in: new table name */
+	trx_t*		trx,		/*!< in/out: transaction */
+	bool		commit)		/*!< in: whether to commit trx */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Checks that the index contains entries in an ascending order, unique
+constraint is not broken, and calculates the number of index entries
+in the read view of the current transaction.
+@return true if ok */
+UNIV_INTERN
+bool
+row_check_index_for_mysql(
+/*======================*/
+	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
+						in MySQL handle */
+	const dict_index_t*	index,		/*!< in: index */
+	ulint*			n_rows)		/*!< out: number of entries
+						seen in the consistent read */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Determines if a table is a magic monitor table.
+@return	true if monitor table */
+UNIV_INTERN
+bool
+row_is_magic_monitor_table(
+/*=======================*/
+	const char*	table_name)	/*!< in: name of the table, in the
+					form database/table_name */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Initialize this module */
+UNIV_INTERN
+void
+row_mysql_init(void);
+/*================*/
+
+/*********************************************************************//**
+Close this module */
+UNIV_INTERN
+void
+row_mysql_close(void);
+/*=================*/
+
+/*********************************************************************//**
+Reassigns the table identifier of a table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_table_id_reassign(
+/*========================*/
+	dict_table_t*	table,	/*!< in/out: table */
+	trx_t*		trx,	/*!< in/out: transaction */
+	table_id_t*	new_id) /*!< out: new table id */
+        __attribute__((nonnull, warn_unused_result));
+
+/* A struct describing a place for an individual column in the MySQL
+row format which is presented to the table handler in ha_innobase.
+This template struct is used to speed up row transformations between
+Innobase and MySQL. */
+
+struct mysql_row_templ_t {
+	ulint	col_no;			/*!< column number of the column */
+	ulint	rec_field_no;		/*!< field number of the column in an
+					Innobase record in the current index;
+					not defined if template_type is
+					ROW_MYSQL_WHOLE_ROW */
+	ulint	clust_rec_field_no;	/*!< field number of the column in an
+					Innobase record in the clustered index;
+					not defined if template_type is
+					ROW_MYSQL_WHOLE_ROW */
+	ulint	icp_rec_field_no;	/*!< field number of the column in an
+					Innobase record in the current index;
+					not defined unless
+					index condition pushdown is used */
+	ulint	mysql_col_offset;	/*!< offset of the column in the MySQL
+					row format */
+	ulint	mysql_col_len;		/*!< length of the column in the MySQL
+					row format */
+	ulint	mysql_null_byte_offset;	/*!< MySQL NULL bit byte offset in a
+					MySQL record */
+	ulint	mysql_null_bit_mask;	/*!< bit mask to get the NULL bit,
+					zero if column cannot be NULL */
+	ulint	type;			/*!< column type in Innobase mtype
+					numbers DATA_CHAR... */
+	ulint	mysql_type;		/*!< MySQL type code; this is always
+					< 256 */
+	ulint	mysql_length_bytes;	/*!< if mysql_type
+					== DATA_MYSQL_TRUE_VARCHAR, this tells
+					whether we should use 1 or 2 bytes to
+					store the MySQL true VARCHAR data
+					length at the start of row in the MySQL
+					format (NOTE that the MySQL key value
+					format always uses 2 bytes for the data
+					len) */
+	ulint	charset;		/*!< MySQL charset-collation code
+					of the column, or zero */
+	ulint	mbminlen;		/*!< minimum length of a char, in bytes,
+					or zero if not a char type */
+	ulint	mbmaxlen;		/*!< maximum length of a char, in bytes,
+					or zero if not a char type */
+	ulint	is_unsigned;		/*!< if a column type is an integer
+					type and this field is != 0, then
+					it is an unsigned integer type */
+};
+
+#define MYSQL_FETCH_CACHE_SIZE		8
+/* After fetching this many rows, we start caching them in fetch_cache */
+#define MYSQL_FETCH_CACHE_THRESHOLD	4
+
+#define ROW_PREBUILT_ALLOCATED	78540783
+#define ROW_PREBUILT_FREED	26423527
+
+/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
+handle used within MySQL; these are used to save CPU time. */
+
+struct row_prebuilt_t {
+	ulint		magic_n;	/*!< this magic number is set to
+					ROW_PREBUILT_ALLOCATED when created,
+					or ROW_PREBUILT_FREED when the
+					struct has been freed */
+	dict_table_t*	table;		/*!< Innobase table handle */
+	dict_index_t*	index;		/*!< current index for a search, if
+					any */
+	trx_t*		trx;		/*!< current transaction handle */
+	unsigned	sql_stat_start:1;/*!< TRUE when we start processing of
+					an SQL statement: we may have to set
+					an intention lock on the table,
+					create a consistent read view etc. */
+	unsigned	mysql_has_locked:1;/*!< this is set TRUE when MySQL
+					calls external_lock on this handle
+					with a lock flag, and set FALSE when
+					with the F_UNLOCK flag */
+	unsigned	clust_index_was_generated:1;
+					/*!< if the user did not define a
+					primary key in MySQL, then Innobase
+					automatically generated a clustered
+					index where the ordering column is
+					the row id: in this case this flag
+					is set to TRUE */
+	unsigned	index_usable:1;	/*!< caches the value of
+					row_merge_is_index_usable(trx,index) */
+	unsigned	read_just_key:1;/*!< set to 1 when MySQL calls
+					ha_innobase::extra with the
+					argument HA_EXTRA_KEYREAD; it is enough
+					to read just columns defined in
+					the index (i.e., no read of the
+					clustered index record necessary) */
+	unsigned	used_in_HANDLER:1;/*!< TRUE if we have been using this
+					handle in a MySQL HANDLER low level
+					index cursor command: then we must
+					store the pcur position even in a
+					unique search from a clustered index,
+					because HANDLER allows NEXT and PREV
+					in such a situation */
+	unsigned	template_type:2;/*!< ROW_MYSQL_WHOLE_ROW,
+					ROW_MYSQL_REC_FIELDS,
+					ROW_MYSQL_DUMMY_TEMPLATE, or
+					ROW_MYSQL_NO_TEMPLATE */
+	unsigned	n_template:10;	/*!< number of elements in the
+					template */
+	unsigned	null_bitmap_len:10;/*!< number of bytes in the SQL NULL
+					bitmap at the start of a row in the
+					MySQL format */
+	unsigned	need_to_access_clustered:1; /*!< if we are fetching
+					columns through a secondary index
+					and at least one column is not in
+					the secondary index, then this is
+					set to TRUE */
+	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
+					a column with DATA_BLOB ==
+					get_innobase_type_from_mysql_type();
+					not to be confused with InnoDB
+					externally stored columns
+					(VARCHAR can be off-page too) */
+	mysql_row_templ_t* mysql_template;/*!< template used to transform
+					rows fast between MySQL and Innobase
+					formats; memory for this template
+					is not allocated from 'heap' */
+	mem_heap_t*	heap;		/*!< memory heap from which
+					these auxiliary structures are
+					allocated when needed */
+	ins_node_t*	ins_node;	/*!< Innobase SQL insert node
+					used to perform inserts
+					to the table */
+	byte*		ins_upd_rec_buff;/*!< buffer for storing data converted
+					to the Innobase format from the MySQL
+					format */
+	const byte*	default_rec;	/*!< the default values of all columns
+					(a "default row") in MySQL format */
+	ulint		hint_need_to_fetch_extra_cols;
+					/*!< normally this is set to 0; if this
+					is set to ROW_RETRIEVE_PRIMARY_KEY,
+					then we should at least retrieve all
+					columns in the primary key; if this
+					is set to ROW_RETRIEVE_ALL_COLS, then
+					we must retrieve all columns in the
+					key (if read_just_key == 1), or all
+					columns in the table */
+	upd_node_t*	upd_node;	/*!< Innobase SQL update node used
+					to perform updates and deletes */
+	trx_id_t	trx_id;		/*!< The table->def_trx_id when
+					ins_graph was built */
+	que_fork_t*	ins_graph;	/*!< Innobase SQL query graph used
+					in inserts. Will be rebuilt on
+					trx_id or n_indexes mismatch. */
+	que_fork_t*	upd_graph;	/*!< Innobase SQL query graph used
+					in updates or deletes */
+	btr_pcur_t	pcur;		/*!< persistent cursor used in selects
+					and updates */
+	btr_pcur_t	clust_pcur;	/*!< persistent cursor used in
+					some selects and updates */
+	que_fork_t*	sel_graph;	/*!< dummy query graph used in
+					selects */
+	dtuple_t*	search_tuple;	/*!< prebuilt dtuple used in selects */
+	byte		row_id[DATA_ROW_ID_LEN];
+					/*!< if the clustered index was
+					generated, the row id of the
+					last row fetched is stored
+					here */
+	doc_id_t	fts_doc_id;	/* if the table has an FTS index on
+					it then we fetch the doc_id.
+					FTS-FIXME: Currently we fetch it always
+					but in the future we must only fetch
+					it when FTS columns are being
+					updated */
+	dtuple_t*	clust_ref;	/*!< prebuilt dtuple used in
+					sel/upd/del */
+	ulint		select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */
+	ulint		stored_select_lock_type;/*!< this field is used to
+					remember the original select_lock_type
+					that was decided in ha_innodb.cc,
+					::store_lock(), ::external_lock(),
+					etc. */
+	ulint		row_read_type;	/*!< ROW_READ_WITH_LOCKS if row locks
+					should be the obtained for records
+					under an UPDATE or DELETE cursor.
+					If innodb_locks_unsafe_for_binlog
+					is TRUE, this can be set to
+					ROW_READ_TRY_SEMI_CONSISTENT, so that
+					if the row under an UPDATE or DELETE
+					cursor was locked by another
+					transaction, InnoDB will resort
+					to reading the last committed value
+					('semi-consistent read').  Then,
+					this field will be set to
+					ROW_READ_DID_SEMI_CONSISTENT to
+					indicate that.	If the row does not
+					match the WHERE condition, MySQL will
+					invoke handler::unlock_row() to
+					clear the flag back to
+					ROW_READ_TRY_SEMI_CONSISTENT and
+					to simply skip the row.	 If
+					the row matches, the next call to
+					row_search_for_mysql() will lock
+					the row.
+					This eliminates lock waits in some
+					cases; note that this breaks
+					serializability. */
+	ulint		new_rec_locks;	/*!< normally 0; if
+					srv_locks_unsafe_for_binlog is
+					TRUE or session is using READ
+					COMMITTED or READ UNCOMMITTED
+					isolation level, set in
+					row_search_for_mysql() if we set a new
+					record lock on the secondary
+					or clustered index; this is
+					used in row_unlock_for_mysql()
+					when releasing the lock under
+					the cursor if we determine
+					after retrieving the row that
+					it does not need to be locked
+					('mini-rollback') */
+	ulint		mysql_prefix_len;/*!< byte offset of the end of
+					the last requested column */
+	ulint		mysql_row_len;	/*!< length in bytes of a row in the
+					MySQL format */
+	ulint		n_rows_fetched;	/*!< number of rows fetched after
+					positioning the current cursor */
+	ulint		fetch_direction;/*!< ROW_SEL_NEXT or ROW_SEL_PREV */
+	byte*		fetch_cache[MYSQL_FETCH_CACHE_SIZE];
+					/*!< a cache for fetched rows if we
+					fetch many rows from the same cursor:
+					it saves CPU time to fetch them in a
+					batch; we reserve mysql_row_len
+					bytes for each such row; these
+					pointers point 4 bytes past the
+					allocated mem buf start, because
+					there is a 4 byte magic number at the
+					start and at the end */
+	ibool		keep_other_fields_on_keyread; /*!< when using fetch
+					cache with HA_EXTRA_KEYREAD, don't
+					overwrite other fields in mysql row
+					row buffer.*/
+	ulint		fetch_cache_first;/*!< position of the first not yet
+					fetched row in fetch_cache */
+	ulint		n_fetch_cached;	/*!< number of not yet fetched rows
+					in fetch_cache */
+	mem_heap_t*	blob_heap;	/*!< in SELECTS BLOB fields are copied
+					to this heap */
+	mem_heap_t*	old_vers_heap;	/*!< memory heap where a previous
+					version is built in consistent read */
+	bool		in_fts_query;	/*!< Whether we are in a FTS query */
+	/*----------------------*/
+	ulonglong	autoinc_last_value;
+					/*!< last value of AUTO-INC interval */
+	ulonglong	autoinc_increment;/*!< The increment step of the auto
+					increment column. Value must be
+					greater than or equal to 1. Required to
+					calculate the next value */
+	ulonglong	autoinc_offset; /*!< The offset passed to
+					get_auto_increment() by MySQL. Required
+					to calculate the next value */
+	dberr_t		autoinc_error;	/*!< The actual error code encountered
+					while trying to init or read the
+					autoinc value from the table. We
+					store it here so that we can return
+					it to MySQL */
+	/*----------------------*/
+	void*		idx_cond;	/*!< In ICP, pointer to a ha_innobase,
+					passed to innobase_index_cond().
+					NULL if index condition pushdown is
+					not used. */
+	ulint		idx_cond_n_cols;/*!< Number of fields in idx_cond_cols.
+					0 if and only if idx_cond == NULL. */
+	/*----------------------*/
+	ulint		magic_n2;	/*!< this should be the same as
+					magic_n */
+	/*----------------------*/
+	unsigned	innodb_api:1;	/*!< whether this is a InnoDB API
+					query */
+	const rec_t*	innodb_api_rec;	/*!< InnoDB API search result */
+	byte*		srch_key_val1;  /*!< buffer used in converting
+					search key values from MySQL format
+					to InnoDB format.*/
+	byte*		srch_key_val2;  /*!< buffer used in converting
+					search key values from MySQL format
+					to InnoDB format.*/
+	uint		srch_key_val_len; /*!< Size of search key */
+
+};
+
+/** Callback for row_mysql_sys_index_iterate() */
+struct SysIndexCallback {
+	virtual ~SysIndexCallback() { }
+
+	/** Callback method
+	@param mtr - current mini transaction
+	@param pcur - persistent cursor. */
+	virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
+};
+
+#define ROW_PREBUILT_FETCH_MAGIC_N	465765687
+
+#define ROW_MYSQL_WHOLE_ROW	0
+#define ROW_MYSQL_REC_FIELDS	1
+#define ROW_MYSQL_NO_TEMPLATE	2
+#define ROW_MYSQL_DUMMY_TEMPLATE 3	/* dummy template used in
+					row_scan_and_check_index */
+
+/* Values for hint_need_to_fetch_extra_cols */
+#define ROW_RETRIEVE_PRIMARY_KEY	1
+#define ROW_RETRIEVE_ALL_COLS		2
+
+/* Values for row_read_type */
+#define ROW_READ_WITH_LOCKS		0
+#define ROW_READ_TRY_SEMI_CONSISTENT	1
+#define ROW_READ_DID_SEMI_CONSISTENT	2
+
+#ifndef UNIV_NONINL
+#include "row0mysql.ic"
+#endif
+
+#endif /* row0mysql.h */
diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic
new file mode 100644
index 00000000000..2eb60898c46
--- /dev/null
+++ b/storage/innobase/include/row0mysql.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 2001, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0mysql.ic
+MySQL interface for Innobase
+
+Created 1/23/2001 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
new file mode 100644
index 00000000000..93dcf9cf49b
--- /dev/null
+++ b/storage/innobase/include/row0purge.h
@@ -0,0 +1,128 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0purge.h
+Purge obsolete records
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0purge_h
+#define row0purge_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "row0purge.h"
+#include "ut0vec.h"
+
+/********************************************************************//**
+Creates a purge node to a query graph.
+@return	own: purge node */
+UNIV_INTERN
+purge_node_t*
+row_purge_node_create(
+/*==================*/
+	que_thr_t*	parent,		/*!< in: parent node, i.e., a
+					thr node */
+	mem_heap_t*	heap)		/*!< in: memory heap where created */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************//**
+Determines if it is possible to remove a secondary index entry.
+Removal is possible if the secondary index entry does not refer to any
+not delete marked version of a clustered index record where DB_TRX_ID
+is newer than the purge view.
+
+NOTE: This function should only be called by the purge thread, only
+while holding a latch on the leaf page of the secondary index entry
+(or keeping the buffer pool watch on the page).  It is possible that
+this function first returns true and then false, if a user transaction
+inserts a record that the secondary index entry would refer to.
+However, in that case, the user transaction would also re-insert the
+secondary index entry after purge has removed it and released the leaf
+page latch.
+@return	true if the secondary index record can be purged */
+UNIV_INTERN
+bool
+row_purge_poss_sec(
+/*===============*/
+	purge_node_t*	node,	/*!< in/out: row purge node */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const dtuple_t*	entry)	/*!< in: secondary index entry */
+	__attribute__((nonnull, warn_unused_result));
+/***************************************************************
+Does the purge operation for a single undo log record. This is a high-level
+function used in an SQL execution graph.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_purge_step(
+/*===========*/
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+
+/* Purge node structure */
+
+struct purge_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_PURGE */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	roll_ptr_t	roll_ptr;/* roll pointer to undo log record */
+	ib_vector_t*    undo_recs;/*!< Undo recs to purge */
+
+	undo_no_t	undo_no;/* undo number of the record */
+
+	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+				... */
+	dict_table_t*	table;	/*!< table where purge is done */
+
+	ulint		cmpl_info;/* compiler analysis info of an update */
+
+	upd_t*		update;	/*!< update vector for a clustered index
+				record */
+	dtuple_t*	ref;	/*!< NULL, or row reference to the next row to
+				handle */
+	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
+				heap) of the indexed fields of the row to
+				handle */
+	dict_index_t*	index;	/*!< NULL, or the next index whose record should
+				be handled */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage for
+				row; this must be emptied after a successful
+				purge of a row */
+	ibool		found_clust;/* TRUE if the clustered index record
+				determined by ref was found in the clustered
+				index, and we were able to position pcur on
+				it */
+	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
+				clustered index record */
+	ibool		done;	/* Debug flag */
+
+};
+
+#ifndef UNIV_NONINL
+#include "row0purge.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic
new file mode 100644
index 00000000000..700106d1048
--- /dev/null
+++ b/storage/innobase/include/row0purge.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+
+/**************************************************//**
+@file include/row0purge.ic
+Purge obsolete records
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/row0quiesce.h b/storage/innobase/include/row0quiesce.h
new file mode 100644
index 00000000000..1d6d11291b8
--- /dev/null
+++ b/storage/innobase/include/row0quiesce.h
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.h
+
+Header file for tablespace quiesce functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0quiesce_h
+#define row0quiesce_h
+
+#include "univ.i"
+#include "dict0types.h"
+
+struct trx_t;
+
+/** The version number of the export meta-data text file. */
+#define IB_EXPORT_CFG_VERSION_V1	0x1UL
+
+/*********************************************************************//**
+Quiesce the tablespace that the table resides in. */
+UNIV_INTERN
+void
+row_quiesce_table_start(
+/*====================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	trx_t*		trx)		/*!< in/out: transaction/session */
+        __attribute__((nonnull));
+
+/*********************************************************************//**
+Set a table's quiesce state.
+@return DB_SUCCESS or errro code. */
+UNIV_INTERN
+dberr_t
+row_quiesce_set_state(
+/*==================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	ib_quiesce_t	state,		/*!< in: quiesce state to set */
+	trx_t*		trx)		/*!< in/out: transaction */
+        __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Cleanup after table quiesce. */
+UNIV_INTERN
+void
+row_quiesce_table_complete(
+/*=======================*/
+	dict_table_t*	table,		/*!< in: quiesce this table */
+	trx_t*		trx)		/*!< in/out: transaction/session */
+        __attribute__((nonnull));
+
+#ifndef UNIV_NONINL
+#include "row0quiesce.ic"
+#endif
+
+#endif /* row0quiesce_h */
diff --git a/storage/innobase/include/row0quiesce.ic b/storage/innobase/include/row0quiesce.ic
new file mode 100644
index 00000000000..f570a6aed05
--- /dev/null
+++ b/storage/innobase/include/row0quiesce.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.ic
+
+Quiesce a tablespace.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
+
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
new file mode 100644
index 00000000000..a4e5e0dd2fa
--- /dev/null
+++ b/storage/innobase/include/row0row.h
@@ -0,0 +1,343 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0row.h
+General row routines
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0row_h
+#define row0row_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "mtr0mtr.h"
+#include "rem0types.h"
+#include "read0types.h"
+#include "row0types.h"
+#include "btr0types.h"
+
+/*********************************************************************//**
+Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
+a clustered index record.
+@return	offset of DATA_TRX_ID */
+UNIV_INLINE
+ulint
+row_get_trx_id_offset(
+/*==================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: record offsets */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Reads the trx id field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+trx_id_t
+row_get_rec_trx_id(
+/*===============*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Reads the roll pointer field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+roll_ptr_t
+row_get_rec_roll_ptr(
+/*=================*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+	__attribute__((nonnull, warn_unused_result));
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged
+@retval NULL if the externally stored columns in the clustered index record
+are unavailable and ext != NULL, or row is missing some needed columns. */
+UNIV_INTERN
+dtuple_t*
+row_build_index_entry_low(
+/*======================*/
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
+	__attribute__((warn_unused_result, nonnull(1,3,4)));
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INLINE
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
+	__attribute__((warn_unused_result, nonnull(1,3,4)));
+/*******************************************************************//**
+An inverse function to row_build_index_entry. Builds a row from a
+record in a clustered index.
+@return	own: row built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_build(
+/*======*/
+	ulint			type,	/*!< in: ROW_COPY_POINTERS or
+					ROW_COPY_DATA; the latter
+					copies also the data fields to
+					heap while the first only
+					places pointers to data fields
+					on the index page, and thus is
+					more efficient */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const rec_t*		rec,	/*!< in: record in the clustered
+					index; NOTE: in the case
+					ROW_COPY_POINTERS the data
+					fields in the row will point
+					directly into this record,
+					therefore, the buffer page of
+					this record must be at least
+					s-latched and the latch held
+					as long as the row dtuple is used! */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
+					or NULL, in which case this function
+					will invoke rec_get_offsets() */
+	const dict_table_t*	col_table,
+					/*!< in: table, to check which
+					externally stored columns
+					occur in the ordering columns
+					of an index, or NULL if
+					index->table should be
+					consulted instead; the user
+					columns in this table should be
+					the same columns as in index->table */
+	const dtuple_t*		add_cols,
+					/*!< in: default values of
+					added columns, or NULL */
+	const ulint*		col_map,/*!< in: mapping of old column
+					numbers to new ones, or NULL */
+	row_ext_t**		ext,	/*!< out, own: cache of
+					externally stored column
+					prefixes, or NULL */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory needed is allocated */
+	__attribute__((nonnull(2,3,9)));
+/*******************************************************************//**
+Converts an index record to a typed data tuple.
+@return index entry built; does not set info_bits, and the data fields
+in the entry will point directly to rec */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry_low(
+/*=======================*/
+	const rec_t*		rec,	/*!< in: record in the index */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	ulint*			n_ext,	/*!< out: number of externally
+					stored columns */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Converts an index record to a typed data tuple. NOTE that externally
+stored (often big) fields are NOT copied to heap.
+@return	own: index entry built */
+UNIV_INTERN
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+	const rec_t*		rec,	/*!< in: record in the index */
+	const dict_index_t*	index,	/*!< in: index */
+	const ulint*		offsets,/*!< in/out: rec_get_offsets(rec) */
+	ulint*			n_ext,	/*!< out: number of externally
+					stored columns */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record.
+@return	own: row reference built; see the NOTE below! */
+UNIV_INTERN
+dtuple_t*
+row_build_row_ref(
+/*==============*/
+	ulint		type,	/*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+				the former copies also the data fields to
+				heap, whereas the latter only places pointers
+				to data fields on the index page */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const rec_t*	rec,	/*!< in: record in the index;
+				NOTE: in the case ROW_COPY_POINTERS
+				the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row reference is used! */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
+				needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INTERN
+void
+row_build_row_ref_in_tuple(
+/*=======================*/
+	dtuple_t*		ref,	/*!< in/out: row reference built;
+					see the NOTE below! */
+	const rec_t*		rec,	/*!< in: record in the index;
+					NOTE: the data fields in ref
+					will point directly into this
+					record, therefore, the buffer
+					page of this record must be at
+					least s-latched and the latch
+					held as long as the row
+					reference is used! */
+	const dict_index_t*	index,	/*!< in: secondary index */
+	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
+					or NULL */
+	trx_t*			trx)	/*!< in: transaction or NULL */
+	__attribute__((nonnull(1,2,3)));
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+	dtuple_t*	ref,	/*!< in/out: typed data tuple where the
+				reference is built */
+	const ulint*	map,	/*!< in: array of field numbers in rec
+				telling how ref should be built from
+				the fields of rec */
+	const rec_t*	rec,	/*!< in: record in the index; must be
+				preserved while ref is used, as we do
+				not copy field values to heap */
+	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/***************************************************************//**
+Searches the clustered index record for a row, if we have the row
+reference.
+@return	TRUE if found */
+UNIV_INTERN
+ibool
+row_search_on_row_ref(
+/*==================*/
+	btr_pcur_t*		pcur,	/*!< out: persistent cursor, which must
+					be closed by the caller */
+	ulint			mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	const dict_table_t*	table,	/*!< in: table */
+	const dtuple_t*		ref,	/*!< in: row reference */
+	mtr_t*			mtr)	/*!< in/out: mtr */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Fetches the clustered index record for a secondary index record. The latches
+on the secondary index record are preserved.
+@return	record or NULL, if no record found */
+UNIV_INTERN
+rec_t*
+row_get_clust_rec(
+/*==============*/
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	const rec_t*	rec,	/*!< in: record in a secondary index */
+	dict_index_t*	index,	/*!< in: secondary index */
+	dict_index_t**	clust_index,/*!< out: clustered index */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
+
+/** Result of row_search_index_entry */
+enum row_search_result {
+	ROW_FOUND = 0,		/*!< the record was found */
+	ROW_NOT_FOUND,		/*!< record not found */
+	ROW_BUFFERED,		/*!< one of BTR_INSERT, BTR_DELETE, or
+				BTR_DELETE_MARK was specified, the
+				secondary index leaf page was not in
+				the buffer pool, and the operation was
+				enqueued in the insert/delete buffer */
+	ROW_NOT_DELETED_REF	/*!< BTR_DELETE was specified, and
+				row_purge_poss_sec() failed */
+};
+
+/***************************************************************//**
+Searches an index record.
+@return	whether the record was found or buffered */
+UNIV_INTERN
+enum row_search_result
+row_search_index_entry(
+/*===================*/
+	dict_index_t*	index,	/*!< in: index */
+	const dtuple_t*	entry,	/*!< in: index entry */
+	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
+	btr_pcur_t*	pcur,	/*!< in/out: persistent cursor, which must
+				be closed by the caller */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull, warn_unused_result));
+
+#define ROW_COPY_DATA		1
+#define ROW_COPY_POINTERS	2
+
+/* The allowed latching order of index records is the following:
+(1) a secondary index record ->
+(2) the clustered index record ->
+(3) rollback segment data for the clustered index record. */
+
+/*******************************************************************//**
+Formats the raw data in "data" (in InnoDB on-disk format) using
+"dict_field" and writes the result to "buf".
+Not more than "buf_size" bytes are written to "buf".
+The result is always NUL-terminated (provided buf_size is positive) and the
+number of bytes that were written to "buf" is returned (including the
+terminating NUL).
+@return	number of bytes that were written */
+UNIV_INTERN
+ulint
+row_raw_format(
+/*===========*/
+	const char*		data,		/*!< in: raw data */
+	ulint			data_len,	/*!< in: raw data length
+						in bytes */
+	const dict_field_t*	dict_field,	/*!< in: index field */
+	char*			buf,		/*!< out: output buffer */
+	ulint			buf_size)	/*!< in: output buffer size
+						in bytes */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "row0row.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
new file mode 100644
index 00000000000..ac62422be1f
--- /dev/null
+++ b/storage/innobase/include/row0row.ic
@@ -0,0 +1,174 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0row.ic
+General row routines
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "trx0undo.h"
+
+/*********************************************************************//**
+Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
+a clustered index record.
+@return	offset of DATA_TRX_ID */
+UNIV_INLINE
+ulint
+row_get_trx_id_offset(
+/*==================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: record offsets */
+{
+	ulint	pos;
+	ulint	offset;
+	ulint	len;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(NULL, index, offsets));
+
+	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+	offset = rec_get_nth_field_offs(offsets, pos, &len);
+
+	ut_ad(len == DATA_TRX_ID_LEN);
+
+	return(offset);
+}
+
+/*********************************************************************//**
+Reads the trx id field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+trx_id_t
+row_get_rec_trx_id(
+/*===============*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ulint	offset;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	offset = index->trx_id_offset;
+
+	if (!offset) {
+		offset = row_get_trx_id_offset(index, offsets);
+	}
+
+	return(trx_read_trx_id(rec + offset));
+}
+
+/*********************************************************************//**
+Reads the roll pointer field from a clustered index record.
+@return	value of the field */
+UNIV_INLINE
+roll_ptr_t
+row_get_rec_roll_ptr(
+/*=================*/
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	ulint	offset;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	offset = index->trx_id_offset;
+
+	if (!offset) {
+		offset = row_get_trx_id_offset(index, offsets);
+	}
+
+	return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
+}
+
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INLINE
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+	const dtuple_t*		row,	/*!< in: row which should be
+					inserted or purged */
+	const row_ext_t*	ext,	/*!< in: externally stored column
+					prefixes, or NULL */
+	dict_index_t*		index,	/*!< in: index on the table */
+	mem_heap_t*		heap)	/*!< in: memory heap from which
+					the memory for the index entry
+					is allocated */
+{
+	dtuple_t*	entry;
+
+	ut_ad(dtuple_check_typed(row));
+	entry = row_build_index_entry_low(row, ext, index, heap);
+	ut_ad(!entry || dtuple_check_typed(entry));
+	return(entry);
+}
+
+/*******************************************************************//**
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+	dtuple_t*	ref,	/*!< in/out: typed data tuple where the
+				reference is built */
+	const ulint*	map,	/*!< in: array of field numbers in rec
+				telling how ref should be built from
+				the fields of rec */
+	const rec_t*	rec,	/*!< in: record in the index; must be
+				preserved while ref is used, as we do
+				not copy field values to heap */
+	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+{
+	dfield_t*	dfield;
+	const byte*	field;
+	ulint		len;
+	ulint		ref_len;
+	ulint		field_no;
+	ulint		i;
+
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_offs_any_extern(offsets));
+	ref_len = dtuple_get_n_fields(ref);
+
+	for (i = 0; i < ref_len; i++) {
+		dfield = dtuple_get_nth_field(ref, i);
+
+		field_no = *(map + i);
+
+		if (field_no != ULINT_UNDEFINED) {
+
+			field = rec_get_nth_field(rec, offsets,
+						  field_no, &len);
+			dfield_set_data(dfield, field, len);
+		}
+	}
+}
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
new file mode 100644
index 00000000000..c8be80f89d9
--- /dev/null
+++ b/storage/innobase/include/row0sel.h
@@ -0,0 +1,409 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0sel.h
+Select
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0sel_h
+#define row0sel_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "btr0pcur.h"
+#include "read0read.h"
+#include "row0mysql.h"
+
+/*********************************************************************//**
+Creates a select node struct.
+@return	own: select node struct */
+UNIV_INTERN
+sel_node_t*
+sel_node_create(
+/*============*/
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/*********************************************************************//**
+Frees the memory private to a select node when a query graph is freed,
+does not free the heap where the node was originally created. */
+UNIV_INTERN
+void
+sel_node_free_private(
+/*==================*/
+	sel_node_t*	node);	/*!< in: select node struct */
+/*********************************************************************//**
+Frees a prefetch buffer for a column, including the dynamically allocated
+memory for data stored there. */
+UNIV_INTERN
+void
+sel_col_prefetch_buf_free(
+/*======================*/
+	sel_buf_t*	prefetch_buf);	/*!< in, own: prefetch buffer */
+/*********************************************************************//**
+Gets the plan node for the nth table in a join.
+@return	plan node */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+	sel_node_t*	node,	/*!< in: select node */
+	ulint		i);	/*!< in: get ith plan node */
+/**********************************************************************//**
+Performs a select step. This is a high-level function used in SQL execution
+graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_sel_step(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs an execution step of an open or close cursor statement node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/**********************************************************************//**
+Performs a fetch for a cursor.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+fetch_step(
+/*=======*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/****************************************************************//**
+Sample callback function for fetch that prints each row.
+@return	always returns non-NULL */
+UNIV_INTERN
+void*
+row_fetch_print(
+/*============*/
+	void*	row,		/*!< in:  sel_node_t* */
+	void*	user_arg);	/*!< in:  not used */
+/***********************************************************//**
+Prints a row in a select result.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_printf_step(
+/*============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/****************************************************************//**
+Converts a key value stored in MySQL format to an Innobase dtuple. The last
+field of the key value may be just a prefix of a fixed length field: hence
+the parameter key_len. But currently we do not allow search keys where the
+last field is only a prefix of the full key field len and print a warning if
+such appears. */
+UNIV_INTERN
+void
+row_sel_convert_mysql_key_to_innobase(
+/*==================================*/
+	dtuple_t*	tuple,		/*!< in/out: tuple where to build;
+					NOTE: we assume that the type info
+					in the tuple is already according
+					to index! */
+	byte*		buf,		/*!< in: buffer to use in field
+					conversions; NOTE that dtuple->data
+					may end up pointing inside buf so
+					do not discard that buffer while
+					the tuple is being used. See
+					row_mysql_store_col_in_innobase_format()
+					in the case of DATA_INT */
+	ulint		buf_len,	/*!< in: buffer length */
+	dict_index_t*	index,		/*!< in: index of the key value */
+	const byte*	key_ptr,	/*!< in: MySQL key value */
+	ulint		key_len,	/*!< in: MySQL key value length */
+	trx_t*		trx);		/*!< in: transaction */
+/********************************************************************//**
+Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor!
+@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
+DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
+UNIV_INTERN
+dberr_t
+row_search_for_mysql(
+/*=================*/
+	byte*		buf,		/*!< in/out: buffer for the fetched
+					row in the MySQL format */
+	ulint		mode,		/*!< in: search mode PAGE_CUR_L, ... */
+	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct for the
+					table handle; this contains the info
+					of search_tuple, index; if search
+					tuple contains 0 fields then we
+					position the cursor at the start or
+					the end of the index, depending on
+					'mode' */
+	ulint		match_mode,	/*!< in: 0 or ROW_SEL_EXACT or
+					ROW_SEL_EXACT_PREFIX */
+	ulint		direction)	/*!< in: 0 or ROW_SEL_NEXT or
+					ROW_SEL_PREV; NOTE: if this is != 0,
+					then prebuilt must have a pcur
+					with stored position! In opening of a
+					cursor 'direction' should be 0. */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Checks if MySQL at the moment is allowed for this table to retrieve a
+consistent read result, or store it to the query cache.
+@return	TRUE if storing or retrieving from the query cache is permitted */
+UNIV_INTERN
+ibool
+row_search_check_if_query_cache_permitted(
+/*======================================*/
+	trx_t*		trx,		/*!< in: transaction object */
+	const char*	norm_name);	/*!< in: concatenation of database name,
+					'/' char, table name */
+/*******************************************************************//**
+Read the max AUTOINC value from an index.
+@return	DB_SUCCESS if all OK else error code */
+UNIV_INTERN
+dberr_t
+row_search_max_autoinc(
+/*===================*/
+	dict_index_t*	index,		/*!< in: index to search */
+	const char*	col_name,	/*!< in: autoinc column name */
+	ib_uint64_t*	value)		/*!< out: AUTOINC value read */
+	__attribute__((nonnull, warn_unused_result));
+
+/** A structure for caching column values for prefetched rows */
+struct sel_buf_t{
+	byte*		data;	/*!< data, or NULL; if not NULL, this field
+				has allocated memory which must be explicitly
+				freed; can be != NULL even when len is
+				UNIV_SQL_NULL */
+	ulint		len;	/*!< data length or UNIV_SQL_NULL */
+	ulint		val_buf_size;
+				/*!< size of memory buffer allocated for data:
+				this can be more than len; this is defined
+				when data != NULL */
+};
+
+/** Query plan */
+struct plan_t{
+	dict_table_t*	table;		/*!< table struct in the dictionary
+					cache */
+	dict_index_t*	index;		/*!< table index used in the search */
+	btr_pcur_t	pcur;		/*!< persistent cursor used to search
+					the index */
+	ibool		asc;		/*!< TRUE if cursor traveling upwards */
+	ibool		pcur_is_open;	/*!< TRUE if pcur has been positioned
+					and we can try to fetch new rows */
+	ibool		cursor_at_end;	/*!< TRUE if the cursor is open but
+					we know that there are no more
+					qualifying rows left to retrieve from
+					the index tree; NOTE though, that
+					there may still be unprocessed rows in
+					the prefetch stack; always FALSE when
+					pcur_is_open is FALSE */
+	ibool		stored_cursor_rec_processed;
+					/*!< TRUE if the pcur position has been
+					stored and the record it is positioned
+					on has already been processed */
+	que_node_t**	tuple_exps;	/*!< array of expressions
+					which are used to calculate
+					the field values in the search
+					tuple: there is one expression
+					for each field in the search
+					tuple */
+	dtuple_t*	tuple;		/*!< search tuple */
+	ulint		mode;		/*!< search mode: PAGE_CUR_G, ... */
+	ulint		n_exact_match;	/*!< number of first fields in
+					the search tuple which must be
+					exactly matched */
+	ibool		unique_search;	/*!< TRUE if we are searching an
+					index record with a unique key */
+	ulint		n_rows_fetched;	/*!< number of rows fetched using pcur
+					after it was opened */
+	ulint		n_rows_prefetched;/*!< number of prefetched rows cached
+					for fetch: fetching several rows in
+					the same mtr saves CPU time */
+	ulint		first_prefetched;/*!< index of the first cached row in
+					select buffer arrays for each column */
+	ibool		no_prefetch;	/*!< no prefetch for this table */
+	sym_node_list_t	columns;	/*!< symbol table nodes for the columns
+					to retrieve from the table */
+	UT_LIST_BASE_NODE_T(func_node_t)
+			end_conds;	/*!< conditions which determine the
+					fetch limit of the index segment we
+					have to look at: when one of these
+					fails, the result set has been
+					exhausted for the cursor in this
+					index; these conditions are normalized
+					so that in a comparison the column
+					for this table is the first argument */
+	UT_LIST_BASE_NODE_T(func_node_t)
+			other_conds;	/*!< the rest of search conditions we can
+					test at this table in a join */
+	ibool		must_get_clust;	/*!< TRUE if index is a non-clustered
+					index and we must also fetch the
+					clustered index record; this is the
+					case if the non-clustered record does
+					not contain all the needed columns, or
+					if this is a single-table explicit
+					cursor, or a searched update or
+					delete */
+	ulint*		clust_map;	/*!< map telling how clust_ref is built
+					from the fields of a non-clustered
+					record */
+	dtuple_t*	clust_ref;	/*!< the reference to the clustered
+					index entry is built here if index is
+					a non-clustered index */
+	btr_pcur_t	clust_pcur;	/*!< if index is non-clustered, we use
+					this pcur to search the clustered
+					index */
+	mem_heap_t*	old_vers_heap;	/*!< memory heap used in building an old
+					version of a row, or NULL */
+};
+
+/** Select node states */
+enum sel_node_state {
+	SEL_NODE_CLOSED,	/*!< it is a declared cursor which is not
+				currently open */
+	SEL_NODE_OPEN,		/*!< intention locks not yet set on tables */
+	SEL_NODE_FETCH,		/*!< intention locks have been set */
+	SEL_NODE_NO_MORE_ROWS	/*!< cursor has reached the result set end */
+};
+
+/** Select statement node */
+struct sel_node_t{
+	que_common_t	common;		/*!< node type: QUE_NODE_SELECT */
+	enum sel_node_state
+			state;	/*!< node state */
+	que_node_t*	select_list;	/*!< select list */
+	sym_node_t*	into_list;	/*!< variables list or NULL */
+	sym_node_t*	table_list;	/*!< table list */
+	ibool		asc;		/*!< TRUE if the rows should be fetched
+					in an ascending order */
+	ibool		set_x_locks;	/*!< TRUE if the cursor is for update or
+					delete, which means that a row x-lock
+					should be placed on the cursor row */
+	ulint		row_lock_mode;	/*!< LOCK_X or LOCK_S */
+	ulint		n_tables;	/*!< number of tables */
+	ulint		fetch_table;	/*!< number of the next table to access
+					in the join */
+	plan_t*		plans;		/*!< array of n_tables many plan nodes
+					containing the search plan and the
+					search data structures */
+	que_node_t*	search_cond;	/*!< search condition */
+	read_view_t*	read_view;	/*!< if the query is a non-locking
+					consistent read, its read view is
+					placed here, otherwise NULL */
+	ibool		consistent_read;/*!< TRUE if the select is a consistent,
+					non-locking read */
+	order_node_t*	order_by;	/*!< order by column definition, or
+					NULL */
+	ibool		is_aggregate;	/*!< TRUE if the select list consists of
+					aggregate functions */
+	ibool		aggregate_already_fetched;
+					/*!< TRUE if the aggregate row has
+					already been fetched for the current
+					cursor */
+	ibool		can_get_updated;/*!< this is TRUE if the select
+					is in a single-table explicit
+					cursor which can get updated
+					within the stored procedure,
+					or in a searched update or
+					delete; NOTE that to determine
+					of an explicit cursor if it
+					can get updated, the parser
+					checks from a stored procedure
+					if it contains positioned
+					update or delete statements */
+	sym_node_t*	explicit_cursor;/*!< not NULL if an explicit cursor */
+	UT_LIST_BASE_NODE_T(sym_node_t)
+			copy_variables; /*!< variables whose values we have to
+					copy when an explicit cursor is opened,
+					so that they do not change between
+					fetches */
+};
+
+/** Fetch statement node */
+struct fetch_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_FETCH */
+	sel_node_t*	cursor_def;	/*!< cursor definition */
+	sym_node_t*	into_list;	/*!< variables to set */
+
+	pars_user_func_t*
+			func;		/*!< User callback function or NULL.
+					The first argument to the function
+					is a sel_node_t*, containing the
+					results of the SELECT operation for
+					one row. If the function returns
+					NULL, it is not interested in
+					further rows and the cursor is
+					modified so (cursor % NOTFOUND) is
+					true. If it returns not-NULL,
+					continue normally. See
+					row_fetch_print() for an example
+					(and a useful debugging tool). */
+};
+
+/** Open or close cursor operation type */
+enum open_node_op {
+	ROW_SEL_OPEN_CURSOR,	/*!< open cursor */
+	ROW_SEL_CLOSE_CURSOR	/*!< close cursor */
+};
+
+/** Open or close cursor statement node */
+struct open_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_OPEN */
+	enum open_node_op
+			op_type;	/*!< operation type: open or
+					close cursor */
+	sel_node_t*	cursor_def;	/*!< cursor definition */
+};
+
+/** Row printf statement node */
+struct row_printf_node_t{
+	que_common_t	common;		/*!< type: QUE_NODE_ROW_PRINTF */
+	sel_node_t*	sel_node;	/*!< select */
+};
+
+/** Search direction for the MySQL interface */
+enum row_sel_direction {
+	ROW_SEL_NEXT = 1,	/*!< ascending direction */
+	ROW_SEL_PREV = 2	/*!< descending direction */
+};
+
+/** Match mode for the MySQL interface */
+enum row_sel_match_mode {
+	ROW_SEL_EXACT = 1,	/*!< search using a complete key value */
+	ROW_SEL_EXACT_PREFIX	/*!< search using a key prefix which
+				must match rows: the prefix may
+				contain an incomplete field (the last
+				field in prefix may be just a prefix
+				of a fixed length column) */
+};
+
+#ifndef UNIV_NONINL
+#include "row0sel.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
new file mode 100644
index 00000000000..d83a3448832
--- /dev/null
+++ b/storage/innobase/include/row0sel.ic
@@ -0,0 +1,105 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0sel.ic
+Select
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+
+/*********************************************************************//**
+Gets the plan node for the nth table in a join.
+@return	plan node */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+	sel_node_t*	node,	/*!< in: select node */
+	ulint		i)	/*!< in: get ith plan node */
+{
+	ut_ad(i < node->n_tables);
+
+	return(node->plans + i);
+}
+
+/*********************************************************************//**
+Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
+that it will start fetching from the start of the result set again, regardless
+of where it was before, and it will set intention locks on the tables. */
+UNIV_INLINE
+void
+sel_node_reset_cursor(
+/*==================*/
+	sel_node_t*	node)	/*!< in: select node */
+{
+	node->state = SEL_NODE_OPEN;
+}
+
+/**********************************************************************//**
+Performs an execution step of an open or close cursor statement node.
+@return	query thread to run next or NULL */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+	que_thr_t*	thr)	/*!< in: query thread */
+{
+	sel_node_t*	sel_node;
+	open_node_t*	node;
+	ulint		err;
+
+	ut_ad(thr);
+
+	node = (open_node_t*) thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
+
+	sel_node = node->cursor_def;
+
+	err = DB_SUCCESS;
+
+	if (node->op_type == ROW_SEL_OPEN_CURSOR) {
+
+		/*		if (sel_node->state == SEL_NODE_CLOSED) { */
+
+		sel_node_reset_cursor(sel_node);
+		/*		} else {
+		err = DB_ERROR;
+		} */
+	} else {
+		if (sel_node->state != SEL_NODE_CLOSED) {
+
+			sel_node->state = SEL_NODE_CLOSED;
+		} else {
+			err = DB_ERROR;
+		}
+	}
+
+	if (err != DB_SUCCESS) {
+		/* SQL error detected */
+		fprintf(stderr, "SQL error %lu\n", (ulong) err);
+
+		ut_error;
+	}
+
+	thr->run_node = que_node_get_parent(node);
+
+	return(thr);
+}
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
new file mode 100644
index 00000000000..52c89cb01fa
--- /dev/null
+++ b/storage/innobase/include/row0types.h
@@ -0,0 +1,55 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0types.h
+Row operation global types
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0types_h
+#define row0types_h
+
+struct plan_t;
+
+struct upd_t;
+struct upd_field_t;
+struct upd_node_t;
+struct del_node_t;
+struct ins_node_t;
+struct sel_node_t;
+struct open_node_t;
+struct fetch_node_t;
+
+struct row_printf_node_t;
+struct sel_buf_t;
+
+struct undo_node_t;
+
+struct purge_node_t;
+
+struct row_ext_t;
+
+/** Buffer for logging modifications during online index creation */
+struct row_log_t;
+
+/* MySQL data types */
+struct TABLE;
+
+#endif
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
new file mode 100644
index 00000000000..ebf4881208a
--- /dev/null
+++ b/storage/innobase/include/row0uins.h
@@ -0,0 +1,54 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0uins.h
+Fresh insert undo
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0uins_h
+#define row0uins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/***********************************************************//**
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert.  InnoDB is eager in a rollback:
+if it figures out that an index record will be removed in the purge
+anyway, it will remove it in the rollback.
+@return	DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_undo_ins(
+/*=========*/
+	undo_node_t*	node)	/*!< in: row undo node */
+	__attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic
new file mode 100644
index 00000000000..54da2e49874
--- /dev/null
+++ b/storage/innobase/include/row0uins.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0uins.ic
+Fresh insert undo
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
new file mode 100644
index 00000000000..f89d5a334fc
--- /dev/null
+++ b/storage/innobase/include/row0umod.h
@@ -0,0 +1,52 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0umod.h
+Undo modify of a row
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0umod_h
+#define row0umod_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+
+/***********************************************************//**
+Undoes a modify operation on a row of a table.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_undo_mod(
+/*=========*/
+	undo_node_t*	node,	/*!< in: row undo node */
+	que_thr_t*	thr)	/*!< in: query thread */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "row0umod.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic
new file mode 100644
index 00000000000..00a8cd86e01
--- /dev/null
+++ b/storage/innobase/include/row0umod.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0umod.ic
+Undo modify of a row
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
new file mode 100644
index 00000000000..5dddfb4eae1
--- /dev/null
+++ b/storage/innobase/include/row0undo.h
@@ -0,0 +1,135 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0undo.h
+Row undo
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0undo_h
+#define row0undo_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+
+/********************************************************************//**
+Creates a row undo node to a query graph.
+@return	own: undo node */
+UNIV_INTERN
+undo_node_t*
+row_undo_node_create(
+/*=================*/
+	trx_t*		trx,	/*!< in: transaction */
+	que_thr_t*	parent,	/*!< in: parent node, i.e., a thr node */
+	mem_heap_t*	heap);	/*!< in: memory heap where created */
+/***********************************************************//**
+Looks for the clustered index record when node has the row reference.
+The pcur in node is used in the search. If found, stores the row to node,
+and stores the position of pcur, and detaches it. The pcur must be closed
+by the caller in any case.
+@return TRUE if found; NOTE the node->pcur must be closed by the
+caller, regardless of the return value */
+UNIV_INTERN
+ibool
+row_undo_search_clust_to_pcur(
+/*==========================*/
+	undo_node_t*	node);	/*!< in: row undo node */
+/***********************************************************//**
+Undoes a row operation in a table. This is a high-level function used
+in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_undo_step(
+/*==========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/* A single query thread will try to perform the undo for all successive
+versions of a clustered index record, if the transaction has modified it
+several times during the execution which is rolled back. It may happen
+that the task is transferred to another query thread, if the other thread
+is assigned to handle an undo log record in the chain of different versions
+of the record, and the other thread happens to get the x-latch to the
+clustered index record at the right time.
+	If a query thread notices that the clustered index record it is looking
+for is missing, or the roll ptr field in the record doed not point to the
+undo log record the thread was assigned to handle, then it gives up the undo
+task for that undo log record, and fetches the next. This situation can occur
+just in the case where the transaction modified the same record several times
+and another thread is currently doing the undo for successive versions of
+that index record. */
+
+/** Execution state of an undo node */
+enum undo_exec {
+	UNDO_NODE_FETCH_NEXT = 1,	/*!< we should fetch the next
+					undo log record */
+	UNDO_NODE_INSERT,		/*!< undo a fresh insert of a
+					row to a table */
+	UNDO_NODE_MODIFY		/*!< undo a modify operation
+					(DELETE or UPDATE) on a row
+					of a table */
+};
+
+/** Undo node structure */
+struct undo_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_UNDO */
+	enum undo_exec	state;	/*!< node execution state */
+	trx_t*		trx;	/*!< trx for which undo is done */
+	roll_ptr_t	roll_ptr;/*!< roll pointer to undo log record */
+	trx_undo_rec_t*	undo_rec;/*!< undo log record */
+	undo_no_t	undo_no;/*!< undo number of the record */
+	ulint		rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
+				... */
+	trx_id_t	new_trx_id; /*!< trx id to restore to clustered index
+				record */
+	btr_pcur_t	pcur;	/*!< persistent cursor used in searching the
+				clustered index record */
+	dict_table_t*	table;	/*!< table where undo is done */
+	ulint		cmpl_info;/*!< compiler analysis of an update */
+	upd_t*		update;	/*!< update vector for a clustered index
+				record */
+	dtuple_t*	ref;	/*!< row reference to the next row to handle */
+	dtuple_t*	row;	/*!< a copy (also fields copied to heap) of the
+				row to handle */
+	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
+				stored columns of the row */
+	dtuple_t*	undo_row;/*!< NULL, or the row after undo */
+	row_ext_t*	undo_ext;/*!< NULL, or prefixes of the externally
+				stored columns of undo_row */
+	dict_index_t*	index;	/*!< the next index whose record should be
+				handled */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage for
+				row; this must be emptied after undo is tried
+				on a row */
+};
+
+
+#ifndef UNIV_NONINL
+#include "row0undo.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic
new file mode 100644
index 00000000000..b97ffca590e
--- /dev/null
+++ b/storage/innobase/include/row0undo.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0undo.ic
+Row undo
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
new file mode 100644
index 00000000000..27dedeb65a7
--- /dev/null
+++ b/storage/innobase/include/row0upd.h
@@ -0,0 +1,540 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0upd.h
+Update of a row
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0upd_h
+#define row0upd_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "row0types.h"
+#include "btr0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "btr0pcur.h"
+# include "que0types.h"
+# include "pars0types.h"
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Creates an update vector object.
+@return	own: update vector object */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+	ulint		n,	/*!< in: number of fields */
+	mem_heap_t*	heap);	/*!< in: heap from which memory allocated */
+/*********************************************************************//**
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector.
+@return	number of fields */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+	const upd_t*	update);	/*!< in: update vector */
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the nth field of an update vector.
+@return	update vector field */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		n);	/*!< in: field position in update vector */
+#else
+# define upd_get_nth_field(update, n) ((update)->fields + (n))
+#endif
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Sets an index field number to be updated by an update vector field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+	upd_field_t*	upd_field,	/*!< in: update vector field */
+	ulint		field_no,	/*!< in: field number in a clustered
+					index */
+	dict_index_t*	index,		/*!< in: index */
+	trx_t*		trx);		/*!< in: transaction */
+/*********************************************************************//**
+Returns a field of an update vector by field_no.
+@return	update vector field, or NULL */
+UNIV_INLINE
+const upd_field_t*
+upd_get_field_by_field_no(
+/*======================*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		no)	/*!< in: field_no */
+	__attribute__((nonnull, pure));
+/*********************************************************************//**
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record.
+@return	new pointer to mlog */
+UNIV_INTERN
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	roll_ptr_t	roll_ptr,/*!< in: roll ptr of the undo log record */
+	byte*		log_ptr,/*!< pointer to a buffer of size > 20 opened
+				in mlog */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*********************************************************************//**
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const trx_t*	trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record,
+				  can be 0 during IMPORT */
+/*********************************************************************//**
+Sets the trx id or roll ptr field of a clustered index entry. */
+UNIV_INTERN
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+	dtuple_t*	entry,	/*!< in/out: index entry, where the memory
+				buffers for sys fields are already allocated:
+				the function just copies the new values to
+				them */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		type,	/*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
+	ib_uint64_t	val);	/*!< in: value to write */
+/*********************************************************************//**
+Creates an update node for a query graph.
+@return	own: update node */
+UNIV_INTERN
+upd_node_t*
+upd_node_create(
+/*============*/
+	mem_heap_t*	heap);	/*!< in: mem heap where created */
+/***********************************************************//**
+Writes to the redo log the new values of the fields occurring in the index. */
+UNIV_INTERN
+void
+row_upd_index_write_log(
+/*====================*/
+	const upd_t*	update,	/*!< in: update vector */
+	byte*		log_ptr,/*!< in: pointer to mlog buffer: must
+				contain at least MLOG_BUF_MARGIN bytes
+				of free space; the buffer is closed
+				within this function */
+	mtr_t*		mtr);	/*!< in: mtr into whose log to write */
+/***********************************************************//**
+Returns TRUE if row update changes size of some field in index or if some
+field to be updated is stored externally in rec or update.
+@return TRUE if the update changes the size of some field in index or
+the field is external in rec or update */
+UNIV_INTERN
+ibool
+row_upd_changes_field_size_or_external(
+/*===================================*/
+	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const upd_t*	update);/*!< in: update vector */
+/***********************************************************//**
+Returns true if row update contains disowned external fields.
+@return true if the update contains disowned external fields. */
+UNIV_INTERN
+bool
+row_upd_changes_disowned_external(
+/*==============================*/
+	const upd_t*	update)	/*!< in: update vector */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Replaces the new column values stored in the update vector to the
+record given. No field size changes are allowed. This function is
+usually invoked on a clustered index. The only use case for a
+secondary index is row_ins_sec_index_entry_by_modify() or its
+counterpart in ibuf_insert_to_index_page(). */
+UNIV_INTERN
+void
+row_upd_rec_in_place(
+/*=================*/
+	rec_t*		rec,	/*!< in/out: record where replaced */
+	dict_index_t*	index,	/*!< in: the index the record belongs to */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	const upd_t*	update,	/*!< in: update vector */
+	page_zip_des_t*	page_zip);/*!< in: compressed page with enough space
+				available, or NULL */
+#ifndef UNIV_HOTBACKUP
+/***************************************************************//**
+Builds an update vector from those fields which in a secondary index entry
+differ from a record that has the equal ordering fields. NOTE: we compare
+the fields as binary strings!
+@return	own: update vector of differing fields */
+UNIV_INTERN
+upd_t*
+row_upd_build_sec_rec_difference_binary(
+/*====================================*/
+	const rec_t*	rec,	/*!< in: secondary index record */
+	dict_index_t*	index,	/*!< in: index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	__attribute__((warn_unused_result, nonnull));
+/***************************************************************//**
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. NOTE: we compare the fields as binary strings!
+@return own: update vector of differing fields, excluding roll ptr and
+trx id */
+UNIV_INTERN
+const upd_t*
+row_upd_build_difference_binary(
+/*============================*/
+	dict_index_t*	index,	/*!< in: clustered index */
+	const dtuple_t*	entry,	/*!< in: entry to insert */
+	const rec_t*	rec,	/*!< in: clustered index record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
+	bool		no_sys,	/*!< in: skip the system columns
+				DB_TRX_ID and DB_ROLL_PTR */
+	trx_t*		trx,	/*!< in: transaction (for diagnostics),
+				or NULL */
+	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	__attribute__((nonnull(1,2,3,7), warn_unused_result));
+/***********************************************************//**
+Replaces the new column values stored in the update vector to the index entry
+given. */
+UNIV_INTERN
+void
+row_upd_index_replace_new_col_vals_index_pos(
+/*=========================================*/
+	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
+				the clustered index record must be
+				covered by a lock or a page latch to
+				prevent deletion (rollback or purge) */
+	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
+				non-clustered index */
+	const upd_t*	update,	/*!< in: an update vector built for the index so
+				that the field number in an upd_field is the
+				index position */
+	ibool		order_only,
+				/*!< in: if TRUE, limit the replacement to
+				ordering fields of index; note that this
+				does not work for non-clustered indexes. */
+	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
+				copying the new values */
+	__attribute__((nonnull));
+/***********************************************************//**
+Replaces the new column values stored in the update vector to the index entry
+given. */
+UNIV_INTERN
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+	dtuple_t*	entry,	/*!< in/out: index entry where replaced;
+				the clustered index record must be
+				covered by a lock or a page latch to
+				prevent deletion (rollback or purge) */
+	dict_index_t*	index,	/*!< in: index; NOTE that this may also be a
+				non-clustered index */
+	const upd_t*	update,	/*!< in: an update vector built for the
+				CLUSTERED index so that the field number in
+				an upd_field is the clustered index position */
+	mem_heap_t*	heap)	/*!< in: memory heap for allocating and
+				copying the new values */
+	__attribute__((nonnull));
+/***********************************************************//**
+Replaces the new column values stored in the update vector. */
+UNIV_INTERN
+void
+row_upd_replace(
+/*============*/
+	dtuple_t*		row,	/*!< in/out: row where replaced,
+					indexed by col_no;
+					the clustered index record must be
+					covered by a lock or a page latch to
+					prevent deletion (rollback or purge) */
+	row_ext_t**		ext,	/*!< out, own: NULL, or externally
+					stored column prefixes */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const upd_t*		update,	/*!< in: an update vector built for the
+					clustered index */
+	mem_heap_t*		heap);	/*!< in: memory heap */
+/***********************************************************//**
+Checks if an update vector changes an ordering field of an index record.
+
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector changes an ordering field in the index record */
+UNIV_INTERN
+ibool
+row_upd_changes_ord_field_binary_func(
+/*==================================*/
+	dict_index_t*	index,	/*!< in: index of the record */
+	const upd_t*	update,	/*!< in: update vector for the row; NOTE: the
+				field numbers in this MUST be clustered index
+				positions! */
+#ifdef UNIV_DEBUG
+	const que_thr_t*thr,	/*!< in: query thread */
+#endif /* UNIV_DEBUG */
+	const dtuple_t*	row,	/*!< in: old value of row, or NULL if the
+				row and the data values in update are not
+				known when this function is called, e.g., at
+				compile time */
+	const row_ext_t*ext)	/*!< NULL, or prefixes of the externally
+				stored columns in the old row */
+	__attribute__((nonnull(1,2), warn_unused_result));
+#ifdef UNIV_DEBUG
+# define row_upd_changes_ord_field_binary(index,update,thr,row,ext)	\
+	row_upd_changes_ord_field_binary_func(index,update,thr,row,ext)
+#else /* UNIV_DEBUG */
+# define row_upd_changes_ord_field_binary(index,update,thr,row,ext)	\
+	row_upd_changes_ord_field_binary_func(index,update,row,ext)
+#endif /* UNIV_DEBUG */
+/***********************************************************//**
+Checks if an FTS indexed column is affected by an UPDATE.
+@return offset within fts_t::indexes if FTS indexed column updated else
+ULINT_UNDEFINED */
+UNIV_INTERN
+ulint
+row_upd_changes_fts_column(
+/*=======================*/
+	dict_table_t*	table,		/*!< in: table */
+	upd_field_t*	upd_field);	/*!< in: field to check */
+/***********************************************************//**
+Checks if an FTS Doc ID column is affected by an UPDATE.
+@return whether Doc ID column is affected */
+UNIV_INTERN
+bool
+row_upd_changes_doc_id(
+/*===================*/
+	dict_table_t*	table,		/*!< in: table */
+	upd_field_t*	upd_field)	/*!< in: field to check */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************//**
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic.
+NOTE: we compare the fields as binary strings!
+@return TRUE if update vector may change an ordering field in an index
+record */
+UNIV_INTERN
+ibool
+row_upd_changes_some_index_ord_field_binary(
+/*========================================*/
+	const dict_table_t*	table,	/*!< in: table */
+	const upd_t*		update);/*!< in: update vector for the row */
+/***********************************************************//**
+Updates a row in a table. This is a high-level function used
+in SQL execution graphs.
+@return	query thread to run next or NULL */
+UNIV_INTERN
+que_thr_t*
+row_upd_step(
+/*=========*/
+	que_thr_t*	thr);	/*!< in: query thread */
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Parses the log data of system field values.
+@return	log data end or NULL */
+UNIV_INTERN
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	ulint*		pos,	/*!< out: TRX_ID position in record */
+	trx_id_t*	trx_id,	/*!< out: trx id */
+	roll_ptr_t*	roll_ptr);/*!< out: roll ptr */
+/*********************************************************************//**
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+UNIV_INTERN
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
+	ulint		pos,	/*!< in: TRX_ID position in rec */
+	trx_id_t	trx_id,	/*!< in: transaction id */
+	roll_ptr_t	roll_ptr);/*!< in: roll ptr of the undo log record */
+/*********************************************************************//**
+Parses the log data written by row_upd_index_write_log.
+@return	log data end or NULL */
+UNIV_INTERN
+byte*
+row_upd_index_parse(
+/*================*/
+	byte*		ptr,	/*!< in: buffer */
+	byte*		end_ptr,/*!< in: buffer end */
+	mem_heap_t*	heap,	/*!< in: memory heap where update vector is
+				built */
+	upd_t**		update_out);/*!< out: update vector */
+
+
+/* Update vector field */
+struct upd_field_t{
+	unsigned	field_no:16;	/*!< field number in an index, usually
+					the clustered index, but in updating
+					a secondary index record in btr0cur.cc
+					this is the position in the secondary
+					index */
+#ifndef UNIV_HOTBACKUP
+	unsigned	orig_len:16;	/*!< original length of the locally
+					stored part of an externally stored
+					column, or 0 */
+	que_node_t*	exp;		/*!< expression for calculating a new
+					value: it refers to column values and
+					constants in the symbol table of the
+					query graph */
+#endif /* !UNIV_HOTBACKUP */
+	dfield_t	new_val;	/*!< new value for the column */
+};
+
+/* Update vector structure */
+struct upd_t{
+	ulint		info_bits;	/*!< new value of info bits to record;
+					default is 0 */
+	ulint		n_fields;	/*!< number of update fields */
+	upd_field_t*	fields;		/*!< array of update fields */
+};
+
+#ifndef UNIV_HOTBACKUP
+/* Update node structure which also implements the delete operation
+of a row */
+
+struct upd_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_UPDATE */
+	ibool		is_delete;/* TRUE if delete, FALSE if update */
+	ibool		searched_update;
+				/* TRUE if searched update, FALSE if
+				positioned */
+	ibool		in_mysql_interface;
+				/* TRUE if the update node was created
+				for the MySQL interface */
+	dict_foreign_t*	foreign;/* NULL or pointer to a foreign key
+				constraint if this update node is used in
+				doing an ON DELETE or ON UPDATE operation */
+	upd_node_t*	cascade_node;/* NULL or an update node template which
+				is used to implement ON DELETE/UPDATE CASCADE
+				or ... SET NULL for foreign keys */
+	mem_heap_t*	cascade_heap;/* NULL or a mem heap where the cascade
+				node is created */
+	sel_node_t*	select;	/*!< query graph subtree implementing a base
+				table cursor: the rows returned will be
+				updated */
+	btr_pcur_t*	pcur;	/*!< persistent cursor placed on the clustered
+				index record which should be updated or
+				deleted; the cursor is stored in the graph
+				of 'select' field above, except in the case
+				of the MySQL interface */
+	dict_table_t*	table;	/*!< table where updated */
+	upd_t*		update;	/*!< update vector for the row */
+	ulint		update_n_fields;
+				/* when this struct is used to implement
+				a cascade operation for foreign keys, we store
+				here the size of the buffer allocated for use
+				as the update vector */
+	sym_node_list_t	columns;/* symbol table nodes for the columns
+				to retrieve from the table */
+	ibool		has_clust_rec_x_lock;
+				/* TRUE if the select which retrieves the
+				records to update already sets an x-lock on
+				the clustered record; note that it must always
+				set at least an s-lock */
+	ulint		cmpl_info;/* information extracted during query
+				compilation; speeds up execution:
+				UPD_NODE_NO_ORD_CHANGE and
+				UPD_NODE_NO_SIZE_CHANGE, ORed */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/*!< node execution state */
+	dict_index_t*	index;	/*!< NULL, or the next index whose record should
+				be updated */
+	dtuple_t*	row;	/*!< NULL, or a copy (also fields copied to
+				heap) of the row to update; this must be reset
+				to NULL after a successful update */
+	row_ext_t*	ext;	/*!< NULL, or prefixes of the externally
+				stored columns in the old row */
+	dtuple_t*	upd_row;/* NULL, or a copy of the updated row */
+	row_ext_t*	upd_ext;/* NULL, or prefixes of the externally
+				stored columns in upd_row */
+	mem_heap_t*	heap;	/*!< memory heap used as auxiliary storage;
+				this must be emptied after a successful
+				update */
+	/*----------------------*/
+	sym_node_t*	table_sym;/* table node in symbol table */
+	que_node_t*	col_assign_list;
+				/* column assignment list */
+	ulint		magic_n;
+};
+
+#define	UPD_NODE_MAGIC_N	1579975
+
+/* Node execution states */
+#define UPD_NODE_SET_IX_LOCK	   1	/* execution came to the node from
+					a node above and if the field
+					has_clust_rec_x_lock is FALSE, we
+					should set an intention x-lock on
+					the table */
+#define UPD_NODE_UPDATE_CLUSTERED  2	/* clustered index record should be
+					updated */
+#define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
+					inserted, old record is already delete
+					marked */
+#define UPD_NODE_INSERT_BLOB	   4	/* clustered index record should be
+					inserted, old record is already
+					delete-marked; non-updated BLOBs
+					should be inherited by the new record
+					and disowned by the old record */
+#define UPD_NODE_UPDATE_ALL_SEC	   5	/* an ordering field of the clustered
+					index record was changed, or this is
+					a delete operation: should update
+					all the secondary index records */
+#define UPD_NODE_UPDATE_SOME_SEC   6	/* secondary index entries should be
+					looked at and updated if an ordering
+					field changed */
+
+/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
+#define UPD_NODE_NO_ORD_CHANGE	1	/* no secondary index record will be
+					changed in the update and no ordering
+					field of the clustered index */
+#define UPD_NODE_NO_SIZE_CHANGE	2	/* no record field size will be
+					changed in the update */
+
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
new file mode 100644
index 00000000000..618a77fa4bf
--- /dev/null
+++ b/storage/innobase/include/row0upd.ic
@@ -0,0 +1,188 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0upd.ic
+Update of a row
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0log.h"
+#ifndef UNIV_HOTBACKUP
+# include "trx0trx.h"
+# include "trx0undo.h"
+# include "row0row.h"
+# include "lock0lock.h"
+#endif /* !UNIV_HOTBACKUP */
+#include "page0zip.h"
+
+/*********************************************************************//**
+Creates an update vector object.
+@return	own: update vector object */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+	ulint		n,	/*!< in: number of fields */
+	mem_heap_t*	heap)	/*!< in: heap from which memory allocated */
+{
+	upd_t*	update;
+
+	update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
+
+	update->n_fields = n;
+	update->fields = (upd_field_t*)
+		mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
+
+	return(update);
+}
+
+/*********************************************************************//**
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector.
+@return	number of fields */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+	const upd_t*	update)	/*!< in: update vector */
+{
+	ut_ad(update);
+
+	return(update->n_fields);
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Returns the nth field of an update vector.
+@return	update vector field */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		n)	/*!< in: field position in update vector */
+{
+	ut_ad(update);
+	ut_ad(n < update->n_fields);
+
+	return((upd_field_t*) update->fields + n);
+}
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Sets an index field number to be updated by an update vector field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+	upd_field_t*	upd_field,	/*!< in: update vector field */
+	ulint		field_no,	/*!< in: field number in a clustered
+					index */
+	dict_index_t*	index,		/*!< in: index */
+	trx_t*		trx)		/*!< in: transaction */
+{
+	upd_field->field_no = field_no;
+	upd_field->orig_len = 0;
+
+	if (field_no >= dict_index_get_n_fields(index)) {
+		fprintf(stderr,
+			"InnoDB: Error: trying to access field %lu in ",
+			(ulong) field_no);
+		dict_index_name_print(stderr, trx, index);
+		fprintf(stderr, "\n"
+			"InnoDB: but index only has %lu fields\n",
+			(ulong) dict_index_get_n_fields(index));
+		ut_ad(0);
+	}
+
+	dict_col_copy_type(dict_index_get_nth_col(index, field_no),
+			   dfield_get_type(&upd_field->new_val));
+}
+
+/*********************************************************************//**
+Returns a field of an update vector by field_no.
+@return	update vector field, or NULL */
+UNIV_INLINE
+const upd_field_t*
+upd_get_field_by_field_no(
+/*======================*/
+	const upd_t*	update,	/*!< in: update vector */
+	ulint		no)	/*!< in: field_no */
+{
+	ulint	i;
+	for (i = 0; i < upd_get_n_fields(update); i++) {
+		const upd_field_t*	uf = upd_get_nth_field(update, i);
+
+		if (uf->field_no == no) {
+
+			return(uf);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+	rec_t*		rec,	/*!< in/out: record */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose
+				uncompressed part will be updated, or NULL */
+	dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
+	const trx_t*	trx,	/*!< in: transaction */
+	roll_ptr_t	roll_ptr)/*!< in: roll ptr of the undo log record,
+				 can be 0 during IMPORT */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	if (page_zip) {
+		ulint	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+		page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets,
+						   pos, trx->id, roll_ptr);
+	} else {
+		ulint	offset = index->trx_id_offset;
+
+		if (!offset) {
+			offset = row_get_trx_id_offset(index, offsets);
+		}
+
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
+#endif
+		/* During IMPORT the trx id in the record can be in the
+		future, if the .ibd file is being imported from another
+		instance. During IMPORT roll_ptr will be 0. */
+		ut_ad(roll_ptr == 0
+		      || lock_check_trx_id_sanity(
+			      trx_read_trx_id(rec + offset),
+			      rec, index, offsets));
+
+		trx_write_trx_id(rec + offset, trx->id);
+		trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
+	}
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
new file mode 100644
index 00000000000..1df5b4d3e98
--- /dev/null
+++ b/storage/innobase/include/row0vers.h
@@ -0,0 +1,146 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0vers.h
+Row versions
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0vers_h
+#define row0vers_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "rem0types.h"
+#include "mtr0mtr.h"
+#include "read0types.h"
+
+/*****************************************************************//**
+Finds out if an active transaction has inserted or modified a secondary
+index record.
+@return 0 if committed, else the active transaction id;
+NOTE that this function can return false positives but never false
+negatives. The caller must confirm all positive results by calling
+trx_is_active() while holding lock_sys->mutex. */
+UNIV_INTERN
+trx_id_t
+row_vers_impl_x_locked(
+/*===================*/
+	const rec_t*	rec,	/*!< in: record in a secondary index */
+	dict_index_t*	index,	/*!< in: the secondary index */
+	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+/*****************************************************************//**
+Finds out if we must preserve a delete marked earlier version of a clustered
+index record, because it is >= the purge view.
+@return	TRUE if earlier version should be preserved */
+UNIV_INTERN
+ibool
+row_vers_must_preserve_del_marked(
+/*==============================*/
+	trx_id_t	trx_id,	/*!< in: transaction id in the version */
+	mtr_t*		mtr);	/*!< in: mtr holding the latch on the
+				clustered index record; it will also
+				hold the latch on purge_view */
+/*****************************************************************//**
+Finds out if a version of the record, where the version >= the current
+purge view, should have ientry as its secondary index entry. We check
+if there is any not delete marked version of the record where the trx
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE.
+@return	TRUE if earlier version should have */
+UNIV_INTERN
+ibool
+row_vers_old_has_index_entry(
+/*=========================*/
+	ibool		also_curr,/*!< in: TRUE if also rec is included in the
+				versions to search; otherwise only versions
+				prior to it are searched */
+	const rec_t*	rec,	/*!< in: record in the clustered index; the
+				caller must have a latch on the page */
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
+				also hold the latch on purge_view */
+	dict_index_t*	index,	/*!< in: the secondary index */
+	const dtuple_t*	ientry);/*!< in: the secondary index entry */
+/*****************************************************************//**
+Constructs the version of a clustered index record which a consistent
+read should see. We assume that the trx id stored in rec is such that
+the consistent read should not see rec in its present version.
+@return	DB_SUCCESS or DB_MISSING_HISTORY */
+UNIV_INTERN
+dberr_t
+row_vers_build_for_consistent_read(
+/*===============================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index; the
+				caller must have a latch on the page; this
+				latch locks the top of the stack of versions
+				of this records */
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
+				also hold the latch on purge_view */
+	dict_index_t*	index,	/*!< in: the clustered index */
+	ulint**		offsets,/*!< in/out: offsets returned by
+				rec_get_offsets(rec, index) */
+	read_view_t*	view,	/*!< in: the consistent read view */
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
+				the offsets are allocated */
+	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
+				*old_vers is allocated; memory for possible
+				intermediate versions is allocated and freed
+				locally within the function */
+	rec_t**		old_vers)/*!< out, own: old version, or NULL
+				if the history is missing or the record
+				does not exist in the view, that is,
+				it was freshly inserted afterwards */
+	__attribute__((nonnull(1,2,3,4,5,6,7)));
+
+/*****************************************************************//**
+Constructs the last committed version of a clustered index record,
+which should be seen by a semi-consistent read. */
+UNIV_INTERN
+void
+row_vers_build_for_semi_consistent_read(
+/*====================================*/
+	const rec_t*	rec,	/*!< in: record in a clustered index; the
+				caller must have a latch on the page; this
+				latch locks the top of the stack of versions
+				of this records */
+	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
+	dict_index_t*	index,	/*!< in: the clustered index */
+	ulint**		offsets,/*!< in/out: offsets returned by
+				rec_get_offsets(rec, index) */
+	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
+				the offsets are allocated */
+	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
+				*old_vers is allocated; memory for possible
+				intermediate versions is allocated and freed
+				locally within the function */
+	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
+				record does not exist in the view, that is,
+				it was freshly inserted afterwards */
+	__attribute__((nonnull(1,2,3,4,5)));
+
+
+#ifndef UNIV_NONINL
+#include "row0vers.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic
new file mode 100644
index 00000000000..ef43a55bf70
--- /dev/null
+++ b/storage/innobase/include/row0vers.ic
@@ -0,0 +1,30 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0vers.ic
+Row versions
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0row.h"
+#include "dict0dict.h"
+#include "read0read.h"
+#include "page0page.h"
+#include "log0recv.h"
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
new file mode 100644
index 00000000000..cf61ef5528d
--- /dev/null
+++ b/storage/innobase/include/srv0conc.h
@@ -0,0 +1,111 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0conc.h
+
+InnoDB concurrency manager header file
+
+Created 2011/04/18 Sunny Bains
+*******************************************************/
+
+#ifndef srv_conc_h
+#define srv_conc_h
+
+/** We are prepared for a situation that we have this many threads waiting for
+a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
+value. */
+
+extern	ulint	srv_max_n_threads;
+
+/** The following controls how many threads we let inside InnoDB concurrently:
+threads waiting for locks are not counted into the number because otherwise
+we could get a deadlock. Value of 0 will disable the concurrency check. */
+
+extern ulong	srv_thread_concurrency;
+
+/*********************************************************************//**
+Initialise the concurrency management data structures */
+void
+srv_conc_init(void);
+/*===============*/
+
+/*********************************************************************//**
+Free the concurrency management data structures */
+void
+srv_conc_free(void);
+/*===============*/
+
+/*********************************************************************//**
+Puts an OS thread to wait if there are too many concurrent threads
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
+UNIV_INTERN
+void
+srv_conc_enter_innodb(
+/*==================*/
+	trx_t*	trx);		/*!< in: transaction object associated
+				with the thread */
+
+/*********************************************************************//**
+This lets a thread enter InnoDB regardless of the number of threads inside
+InnoDB. This must be called when a thread ends a lock wait. */
+UNIV_INTERN
+void
+srv_conc_force_enter_innodb(
+/*========================*/
+	trx_t*	trx);		/*!< in: transaction object associated with
+				the thread */
+
+/*********************************************************************//**
+This must be called when a thread exits InnoDB in a lock wait or at the
+end of an SQL statement. */
+UNIV_INTERN
+void
+srv_conc_force_exit_innodb(
+/*=======================*/
+	trx_t*	trx);		/*!< in: transaction object associated with
+				the thread */
+
+/*********************************************************************//**
+Get the count of threads waiting inside InnoDB. */
+UNIV_INTERN
+ulint
+srv_conc_get_waiting_threads(void);
+/*==============================*/
+
+/*********************************************************************//**
+Get the count of threads active inside InnoDB. */
+UNIV_INTERN
+ulint
+srv_conc_get_active_threads(void);
+/*==============================*/
+
+#endif /* srv_conc_h */
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
new file mode 100644
index 00000000000..e2ab81bf53a
--- /dev/null
+++ b/storage/innobase/include/srv0mon.h
@@ -0,0 +1,896 @@
+/***********************************************************************
+
+Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+***********************************************************************/
+
+/**************************************************//**
+@file include/srv0mon.h
+Server monitor counter related defines
+
+Created 12/15/2009	Jimmy Yang
+*******************************************************/
+
+#ifndef srv0mon_h
+#define srv0mon_h
+
+#include "univ.i"
+#ifndef UNIV_HOTBACKUP
+
+
+/** Possible status values for "mon_status" in "struct monitor_value" */
+enum monitor_running_status {
+	MONITOR_STARTED = 1,	/*!< Monitor has been turned on */
+	MONITOR_STOPPED = 2	/*!< Monitor has been turned off */
+};
+
+typedef enum monitor_running_status	monitor_running_t;
+
+/** Monitor counter value type */
+typedef	ib_int64_t			mon_type_t;
+
+/** Two monitor structures are defined in this file. One is
+"monitor_value_t" which contains dynamic counter values for each
+counter. The other is "monitor_info_t", which contains
+static information (counter name, desc etc.) for each counter.
+In addition, an enum datatype "monitor_id_t" is also defined,
+it identifies each monitor with an internally used symbol, whose
+integer value indexes into above two structure for its dynamic
+and static information.
+Developer who intend to add new counters would require to
+fill in counter information as described in "monitor_info_t" and
+create the internal counter ID in "monitor_id_t". */
+
+/** Structure containing the actual values of a monitor counter. */
+struct monitor_value_t {
+	ib_time_t	mon_start_time;	/*!< Start time of monitoring  */
+	ib_time_t	mon_stop_time;	/*!< Stop time of monitoring */
+	ib_time_t	mon_reset_time;	/*!< Time counter resetted */
+	mon_type_t	mon_value;	/*!< Current counter Value */
+	mon_type_t	mon_max_value;	/*!< Current Max value */
+	mon_type_t	mon_min_value;	/*!< Current Min value */
+	mon_type_t	mon_value_reset;/*!< value at last reset */
+	mon_type_t	mon_max_value_start; /*!< Max value since start */
+	mon_type_t	mon_min_value_start; /*!< Min value since start */
+	mon_type_t	mon_start_value;/*!< Value at the start time */
+	mon_type_t	mon_last_value;	/*!< Last set of values */
+	monitor_running_t mon_status;	/* whether monitor still running */
+};
+
+/** Follwoing defines are possible values for "monitor_type" field in
+"struct monitor_info" */
+enum monitor_type_t {
+	MONITOR_NONE = 0,	/*!< No monitoring */
+	MONITOR_MODULE = 1,	/*!< This is a monitor module type,
+				not a counter */
+	MONITOR_EXISTING = 2,	/*!< The monitor carries information from
+				an existing system status variable */
+	MONITOR_NO_AVERAGE = 4,	/*!< Set this status if we don't want to
+				calculate the average value for the counter */
+	MONITOR_DISPLAY_CURRENT = 8, /*!< Display current value of the
+				counter, rather than incremental value
+				over the period. Mostly for counters
+				displaying current resource usage */
+	MONITOR_GROUP_MODULE = 16, /*!< Monitor can be turned on/off
+				only as a module, but not individually */
+	MONITOR_DEFAULT_ON = 32,/*!< Monitor will be turned on by default at
+				server start up */
+	MONITOR_SET_OWNER = 64,	/*!< Owner of "monitor set", a set of
+				monitor counters */
+	MONITOR_SET_MEMBER = 128,/*!< Being part of a "monitor set" */
+	MONITOR_HIDDEN = 256	/*!< Do not display this monitor in the
+				metrics table */
+};
+
+/** Counter minimum value is initialized to be max value of
+ mon_type_t (ib_int64_t) */
+#define	MIN_RESERVED		((mon_type_t) (IB_UINT64_MAX >> 1))
+#define	MAX_RESERVED		(~MIN_RESERVED)
+
+/** This enumeration defines internal monitor identifier used internally
+to identify each particular counter. Its value indexes into two arrays,
+one is the "innodb_counter_value" array which records actual monitor
+counter values, the other is "innodb_counter_info" array which describes
+each counter's basic information (name, desc etc.). A couple of
+naming rules here:
+1) If the monitor defines a module, it starts with MONITOR_MODULE
+2) If the monitor uses exisitng counters from "status variable", its ID
+name shall start with MONITOR_OVLD
+
+Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail
+information for each monitor counter */
+
+enum monitor_id_t {
+	/* This is to identify the default value set by the metrics
+	control global variables */
+	MONITOR_DEFAULT_START = 0,
+
+	/* Start of Metadata counter */
+	MONITOR_MODULE_METADATA,
+	MONITOR_TABLE_OPEN,
+	MONITOR_TABLE_CLOSE,
+	MONITOR_TABLE_REFERENCE,
+	MONITOR_OVLD_META_MEM_POOL,
+
+	/* Lock manager related counters */
+	MONITOR_MODULE_LOCK,
+	MONITOR_DEADLOCK,
+	MONITOR_TIMEOUT,
+	MONITOR_LOCKREC_WAIT,
+	MONITOR_TABLELOCK_WAIT,
+	MONITOR_NUM_RECLOCK_REQ,
+	MONITOR_RECLOCK_CREATED,
+	MONITOR_RECLOCK_REMOVED,
+	MONITOR_NUM_RECLOCK,
+	MONITOR_TABLELOCK_CREATED,
+	MONITOR_TABLELOCK_REMOVED,
+	MONITOR_NUM_TABLELOCK,
+	MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT,
+	MONITOR_OVLD_LOCK_WAIT_TIME,
+	MONITOR_OVLD_LOCK_MAX_WAIT_TIME,
+	MONITOR_OVLD_ROW_LOCK_WAIT,
+	MONITOR_OVLD_LOCK_AVG_WAIT_TIME,
+
+	/* Buffer and I/O realted counters. */
+	MONITOR_MODULE_BUFFER,
+	MONITOR_OVLD_BUFFER_POOL_SIZE,
+	MONITOR_OVLD_BUF_POOL_READS,
+	MONITOR_OVLD_BUF_POOL_READ_REQUESTS,
+	MONITOR_OVLD_BUF_POOL_WRITE_REQUEST,
+	MONITOR_OVLD_BUF_POOL_WAIT_FREE,
+	MONITOR_OVLD_BUF_POOL_READ_AHEAD,
+	MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED,
+	MONITOR_OVLD_BUF_POOL_PAGE_TOTAL,
+	MONITOR_OVLD_BUF_POOL_PAGE_MISC,
+	MONITOR_OVLD_BUF_POOL_PAGES_DATA,
+	MONITOR_OVLD_BUF_POOL_BYTES_DATA,
+	MONITOR_OVLD_BUF_POOL_PAGES_DIRTY,
+	MONITOR_OVLD_BUF_POOL_BYTES_DIRTY,
+	MONITOR_OVLD_BUF_POOL_PAGES_FREE,
+	MONITOR_OVLD_PAGE_CREATED,
+	MONITOR_OVLD_PAGES_WRITTEN,
+	MONITOR_OVLD_PAGES_READ,
+	MONITOR_OVLD_BYTE_READ,
+	MONITOR_OVLD_BYTE_WRITTEN,
+	MONITOR_FLUSH_BATCH_SCANNED,
+	MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+	MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
+	MONITOR_FLUSH_HP_RESCAN,
+	MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+	MONITOR_FLUSH_BATCH_COUNT,
+	MONITOR_FLUSH_BATCH_PAGES,
+	MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+	MONITOR_FLUSH_NEIGHBOR_COUNT,
+	MONITOR_FLUSH_NEIGHBOR_PAGES,
+	MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+	MONITOR_FLUSH_AVG_PAGE_RATE,
+	MONITOR_FLUSH_LSN_AVG_RATE,
+	MONITOR_FLUSH_PCT_FOR_DIRTY,
+	MONITOR_FLUSH_PCT_FOR_LSN,
+	MONITOR_FLUSH_SYNC_WAITS,
+	MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+	MONITOR_FLUSH_ADAPTIVE_COUNT,
+	MONITOR_FLUSH_ADAPTIVE_PAGES,
+	MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+	MONITOR_FLUSH_SYNC_COUNT,
+	MONITOR_FLUSH_SYNC_PAGES,
+	MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+	MONITOR_FLUSH_BACKGROUND_COUNT,
+	MONITOR_FLUSH_BACKGROUND_PAGES,
+	MONITOR_LRU_BATCH_SCANNED,
+	MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
+	MONITOR_LRU_BATCH_SCANNED_PER_CALL,
+	MONITOR_LRU_BATCH_TOTAL_PAGE,
+	MONITOR_LRU_BATCH_COUNT,
+	MONITOR_LRU_BATCH_PAGES,
+	MONITOR_LRU_SINGLE_FLUSH_SCANNED,
+	MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
+	MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
+	MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT,
+	MONITOR_LRU_GET_FREE_SEARCH,
+	MONITOR_LRU_SEARCH_SCANNED,
+	MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
+	MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
+	MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+	MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
+	MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
+
+	/* Buffer Page I/O specific counters. */
+	MONITOR_MODULE_BUF_PAGE,
+	MONITOR_INDEX_LEAF_PAGE_READ,
+	MONITOR_INDEX_NON_LEAF_PAGE_READ,
+	MONITOR_INDEX_IBUF_LEAF_PAGE_READ,
+	MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ,
+	MONITOR_UNDO_LOG_PAGE_READ,
+	MONITOR_INODE_PAGE_READ,
+	MONITOR_IBUF_FREELIST_PAGE_READ,
+	MONITOR_IBUF_BITMAP_PAGE_READ,
+	MONITOR_SYSTEM_PAGE_READ,
+	MONITOR_TRX_SYSTEM_PAGE_READ,
+	MONITOR_FSP_HDR_PAGE_READ,
+	MONITOR_XDES_PAGE_READ,
+	MONITOR_BLOB_PAGE_READ,
+	MONITOR_ZBLOB_PAGE_READ,
+	MONITOR_ZBLOB2_PAGE_READ,
+	MONITOR_OTHER_PAGE_READ,
+	MONITOR_INDEX_LEAF_PAGE_WRITTEN,
+	MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN,
+	MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN,
+	MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN,
+	MONITOR_UNDO_LOG_PAGE_WRITTEN,
+	MONITOR_INODE_PAGE_WRITTEN,
+	MONITOR_IBUF_FREELIST_PAGE_WRITTEN,
+	MONITOR_IBUF_BITMAP_PAGE_WRITTEN,
+	MONITOR_SYSTEM_PAGE_WRITTEN,
+	MONITOR_TRX_SYSTEM_PAGE_WRITTEN,
+	MONITOR_FSP_HDR_PAGE_WRITTEN,
+	MONITOR_XDES_PAGE_WRITTEN,
+	MONITOR_BLOB_PAGE_WRITTEN,
+	MONITOR_ZBLOB_PAGE_WRITTEN,
+	MONITOR_ZBLOB2_PAGE_WRITTEN,
+	MONITOR_OTHER_PAGE_WRITTEN,
+
+	/* OS level counters (I/O) */
+	MONITOR_MODULE_OS,
+	MONITOR_OVLD_OS_FILE_READ,
+	MONITOR_OVLD_OS_FILE_WRITE,
+	MONITOR_OVLD_OS_FSYNC,
+	MONITOR_OS_PENDING_READS,
+	MONITOR_OS_PENDING_WRITES,
+	MONITOR_OVLD_OS_LOG_WRITTEN,
+	MONITOR_OVLD_OS_LOG_FSYNC,
+	MONITOR_OVLD_OS_LOG_PENDING_FSYNC,
+	MONITOR_OVLD_OS_LOG_PENDING_WRITES,
+
+	/* Transaction related counters */
+	MONITOR_MODULE_TRX,
+	MONITOR_TRX_RW_COMMIT,
+	MONITOR_TRX_RO_COMMIT,
+	MONITOR_TRX_NL_RO_COMMIT,
+	MONITOR_TRX_COMMIT_UNDO,
+	MONITOR_TRX_ROLLBACK,
+	MONITOR_TRX_ROLLBACK_SAVEPOINT,
+	MONITOR_TRX_ROLLBACK_ACTIVE,
+	MONITOR_TRX_ACTIVE,
+	MONITOR_RSEG_HISTORY_LEN,
+	MONITOR_NUM_UNDO_SLOT_USED,
+	MONITOR_NUM_UNDO_SLOT_CACHED,
+	MONITOR_RSEG_CUR_SIZE,
+
+	/* Purge related counters */
+	MONITOR_MODULE_PURGE,
+	MONITOR_N_DEL_ROW_PURGE,
+	MONITOR_N_UPD_EXIST_EXTERN,
+	MONITOR_PURGE_INVOKED,
+	MONITOR_PURGE_N_PAGE_HANDLED,
+	MONITOR_DML_PURGE_DELAY,
+	MONITOR_PURGE_STOP_COUNT,
+	MONITOR_PURGE_RESUME_COUNT,
+
+	/* Recovery related counters */
+	MONITOR_MODULE_RECOVERY,
+	MONITOR_NUM_CHECKPOINT,
+	MONITOR_OVLD_LSN_FLUSHDISK,
+	MONITOR_OVLD_LSN_CHECKPOINT,
+	MONITOR_OVLD_LSN_CURRENT,
+	MONITOR_LSN_CHECKPOINT_AGE,
+	MONITOR_OVLD_BUF_OLDEST_LSN,
+	MONITOR_OVLD_MAX_AGE_ASYNC,
+	MONITOR_OVLD_MAX_AGE_SYNC,
+	MONITOR_PENDING_LOG_WRITE,
+	MONITOR_PENDING_CHECKPOINT_WRITE,
+	MONITOR_LOG_IO,
+	MONITOR_OVLD_LOG_WAITS,
+	MONITOR_OVLD_LOG_WRITE_REQUEST,
+	MONITOR_OVLD_LOG_WRITES,
+
+	/* Page Manager related counters */
+	MONITOR_MODULE_PAGE,
+	MONITOR_PAGE_COMPRESS,
+	MONITOR_PAGE_DECOMPRESS,
+	MONITOR_PAD_INCREMENTS,
+	MONITOR_PAD_DECREMENTS,
+
+	/* Index related counters */
+	MONITOR_MODULE_INDEX,
+	MONITOR_INDEX_SPLIT,
+	MONITOR_INDEX_MERGE_ATTEMPTS,
+	MONITOR_INDEX_MERGE_SUCCESSFUL,
+	MONITOR_INDEX_REORG_ATTEMPTS,
+	MONITOR_INDEX_REORG_SUCCESSFUL,
+	MONITOR_INDEX_DISCARD,
+
+	/* Adaptive Hash Index related counters */
+	MONITOR_MODULE_ADAPTIVE_HASH,
+	MONITOR_OVLD_ADAPTIVE_HASH_SEARCH,
+	MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE,
+	MONITOR_ADAPTIVE_HASH_PAGE_ADDED,
+	MONITOR_ADAPTIVE_HASH_PAGE_REMOVED,
+	MONITOR_ADAPTIVE_HASH_ROW_ADDED,
+	MONITOR_ADAPTIVE_HASH_ROW_REMOVED,
+	MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND,
+	MONITOR_ADAPTIVE_HASH_ROW_UPDATED,
+
+	/* Tablespace related counters */
+	MONITOR_MODULE_FIL_SYSTEM,
+	MONITOR_OVLD_N_FILE_OPENED,
+
+	/* InnoDB Change Buffer related counters */
+	MONITOR_MODULE_IBUF_SYSTEM,
+	MONITOR_OVLD_IBUF_MERGE_INSERT,
+	MONITOR_OVLD_IBUF_MERGE_DELETE,
+	MONITOR_OVLD_IBUF_MERGE_PURGE,
+	MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT,
+	MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE,
+	MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE,
+	MONITOR_OVLD_IBUF_MERGES,
+	MONITOR_OVLD_IBUF_SIZE,
+
+	/* Counters for server operations */
+	MONITOR_MODULE_SERVER,
+	MONITOR_MASTER_THREAD_SLEEP,
+	MONITOR_OVLD_SERVER_ACTIVITY,
+	MONITOR_MASTER_ACTIVE_LOOPS,
+	MONITOR_MASTER_IDLE_LOOPS,
+	MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
+	MONITOR_SRV_IBUF_MERGE_MICROSECOND,
+	MONITOR_SRV_LOG_FLUSH_MICROSECOND,
+	MONITOR_SRV_MEM_VALIDATE_MICROSECOND,
+	MONITOR_SRV_PURGE_MICROSECOND,
+	MONITOR_SRV_DICT_LRU_MICROSECOND,
+	MONITOR_SRV_CHECKPOINT_MICROSECOND,
+	MONITOR_OVLD_SRV_DBLWR_WRITES,
+	MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN,
+	MONITOR_OVLD_SRV_PAGE_SIZE,
+	MONITOR_OVLD_RWLOCK_S_SPIN_WAITS,
+	MONITOR_OVLD_RWLOCK_X_SPIN_WAITS,
+	MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS,
+	MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS,
+	MONITOR_OVLD_RWLOCK_S_OS_WAITS,
+	MONITOR_OVLD_RWLOCK_X_OS_WAITS,
+
+	/* Data DML related counters */
+	MONITOR_MODULE_DML_STATS,
+	MONITOR_OLVD_ROW_READ,
+	MONITOR_OLVD_ROW_INSERTED,
+	MONITOR_OLVD_ROW_DELETED,
+	MONITOR_OLVD_ROW_UPDTATED,
+
+	/* Data DDL related counters */
+	MONITOR_MODULE_DDL_STATS,
+	MONITOR_BACKGROUND_DROP_INDEX,
+	MONITOR_BACKGROUND_DROP_TABLE,
+	MONITOR_ONLINE_CREATE_INDEX,
+	MONITOR_PENDING_ALTER_TABLE,
+
+	MONITOR_MODULE_ICP,
+	MONITOR_ICP_ATTEMPTS,
+	MONITOR_ICP_NO_MATCH,
+	MONITOR_ICP_OUT_OF_RANGE,
+	MONITOR_ICP_MATCH,
+
+	/* This is used only for control system to turn
+	on/off and reset all monitor counters */
+	MONITOR_ALL_COUNTER,
+
+	/* This must be the last member */
+	NUM_MONITOR
+};
+
+/** This informs the monitor control system to turn
+on/off and reset monitor counters through wild card match */
+#define	MONITOR_WILDCARD_MATCH		(NUM_MONITOR + 1)
+
+/** Cannot find monitor counter with a specified name */
+#define	MONITOR_NO_MATCH		(NUM_MONITOR + 2)
+
+/** struct monitor_info describes the basic/static information
+about each monitor counter. */
+struct monitor_info_t {
+	const char*	monitor_name;	/*!< Monitor name */
+	const char*	monitor_module;	/*!< Sub Module the monitor
+					belongs to */
+	const char*	monitor_desc;	/*!< Brief desc of monitor counter */
+	monitor_type_t	monitor_type;	/*!< Type of Monitor Info */
+	monitor_id_t	monitor_related_id;/*!< Monitor ID of counter that
+					related to this monitor. This is
+					set when the monitor belongs to
+					a "monitor set" */
+	monitor_id_t	monitor_id;	/*!< Monitor ID as defined in enum
+					monitor_id_t */
+};
+
+/** Following are the "set_option" values allowed for
+srv_mon_process_existing_counter() and srv_mon_process_existing_counter()
+functions. To turn on/off/reset the monitor counters. */
+enum mon_option_t {
+	MONITOR_TURN_ON = 1,		/*!< Turn on the counter */
+	MONITOR_TURN_OFF,		/*!< Turn off the counter */
+	MONITOR_RESET_VALUE,		/*!< Reset current values */
+	MONITOR_RESET_ALL_VALUE,	/*!< Reset all values */
+	MONITOR_GET_VALUE		/*!< Option for
+					srv_mon_process_existing_counter()
+					function */
+};
+
+/** Number of bit in a ulint datatype */
+#define	NUM_BITS_ULINT	(sizeof(ulint) * CHAR_BIT)
+
+/** This "monitor_set_tbl" is a bitmap records whether a particular monitor
+counter has been turned on or off */
+extern ulint		monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) /
+					NUM_BITS_ULINT];
+
+/** Macros to turn on/off the control bit in monitor_set_tbl for a monitor
+counter option. */
+#define MONITOR_ON(monitor)				\
+	(monitor_set_tbl[monitor / NUM_BITS_ULINT] |=	\
+			((ulint)1 << (monitor % NUM_BITS_ULINT)))
+
+#define MONITOR_OFF(monitor)				\
+	(monitor_set_tbl[monitor / NUM_BITS_ULINT] &=	\
+			~((ulint)1 << (monitor % NUM_BITS_ULINT)))
+
+/** Check whether the requested monitor is turned on/off */
+#define MONITOR_IS_ON(monitor)				\
+	(monitor_set_tbl[monitor / NUM_BITS_ULINT] &	\
+			((ulint)1 << (monitor % NUM_BITS_ULINT)))
+
+/** The actual monitor counter array that records each monintor counter
+value */
+extern monitor_value_t	 innodb_counter_value[NUM_MONITOR];
+
+/** Following are macro defines for basic montior counter manipulations.
+Please note we do not provide any synchronization for these monitor
+operations due to performance consideration. Most counters can
+be placed under existing mutex protections in respective code
+module. */
+
+/** Macros to access various fields of a monitor counters */
+#define MONITOR_FIELD(monitor, field)			\
+		(innodb_counter_value[monitor].field)
+
+#define MONITOR_VALUE(monitor)				\
+		MONITOR_FIELD(monitor, mon_value)
+
+#define MONITOR_MAX_VALUE(monitor)			\
+		MONITOR_FIELD(monitor, mon_max_value)
+
+#define MONITOR_MIN_VALUE(monitor)			\
+		MONITOR_FIELD(monitor, mon_min_value)
+
+#define MONITOR_VALUE_RESET(monitor)			\
+		MONITOR_FIELD(monitor, mon_value_reset)
+
+#define MONITOR_MAX_VALUE_START(monitor)		\
+		MONITOR_FIELD(monitor, mon_max_value_start)
+
+#define MONITOR_MIN_VALUE_START(monitor)		\
+		MONITOR_FIELD(monitor, mon_min_value_start)
+
+#define MONITOR_LAST_VALUE(monitor)			\
+		MONITOR_FIELD(monitor, mon_last_value)
+
+#define MONITOR_START_VALUE(monitor)			\
+		MONITOR_FIELD(monitor, mon_start_value)
+
+#define MONITOR_VALUE_SINCE_START(monitor)		\
+		(MONITOR_VALUE(monitor) + MONITOR_VALUE_RESET(monitor))
+
+#define MONITOR_STATUS(monitor)				\
+		MONITOR_FIELD(monitor, mon_status)
+
+#define MONITOR_SET_START(monitor)					\
+	do {								\
+		MONITOR_STATUS(monitor) = MONITOR_STARTED;		\
+		MONITOR_FIELD((monitor), mon_start_time) = time(NULL);	\
+	} while (0)
+
+#define MONITOR_SET_OFF(monitor)					\
+	do {								\
+		MONITOR_STATUS(monitor) = MONITOR_STOPPED;		\
+		MONITOR_FIELD((monitor), mon_stop_time) = time(NULL);	\
+	} while (0)
+
+#define	MONITOR_INIT_ZERO_VALUE		0
+
+/** Max and min values are initialized when we first turn on the monitor
+counter, and set the MONITOR_STATUS. */
+#define MONITOR_MAX_MIN_NOT_INIT(monitor)				\
+		(MONITOR_STATUS(monitor) == MONITOR_INIT_ZERO_VALUE	\
+		 && MONITOR_MIN_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE \
+		 && MONITOR_MAX_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE)
+
+#define MONITOR_INIT(monitor)						\
+	if (MONITOR_MAX_MIN_NOT_INIT(monitor)) {			\
+		MONITOR_MIN_VALUE(monitor) = MIN_RESERVED;		\
+		MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED;	\
+		MONITOR_MAX_VALUE(monitor) = MAX_RESERVED;		\
+		MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED;	\
+	}
+
+/** Macros to increment/decrement the counters. The normal
+monitor counter operation expects appropriate synchronization
+already exists. No additional mutex is necessary when operating
+on the counters */
+#define	MONITOR_INC(monitor)						\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor)++;				\
+		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+/** Increment a monitor counter under mutex protection.
+Use MONITOR_INC if appropriate mutex protection already exists.
+@param monitor	monitor to be incremented by 1
+@param mutex	mutex to acquire and relese */
+# define MONITOR_MUTEX_INC(mutex, monitor)				\
+	ut_ad(!mutex_own(mutex));					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		mutex_enter(mutex);					\
+		if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
+		}							\
+		mutex_exit(mutex);					\
+	}
+/** Decrement a monitor counter under mutex protection.
+Use MONITOR_DEC if appropriate mutex protection already exists.
+@param monitor	monitor to be decremented by 1
+@param mutex	mutex to acquire and relese */
+# define MONITOR_MUTEX_DEC(mutex, monitor)				\
+	ut_ad(!mutex_own(mutex));					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		mutex_enter(mutex);					\
+		if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
+		}							\
+		mutex_exit(mutex);					\
+	}
+
+#if defined HAVE_ATOMIC_BUILTINS_64
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor	monitor to be incremented by 1 */
+# define MONITOR_ATOMIC_INC(monitor)					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		ib_uint64_t	value;					\
+		value  = os_atomic_increment_uint64(			\
+			(ib_uint64_t*) &MONITOR_VALUE(monitor),	 1);	\
+		/* Note: This is not 100% accurate because of the	\
+		inherent race, we ignore it due to performance. */	\
+		if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) {	\
+			MONITOR_MAX_VALUE(monitor) = value;		\
+		}							\
+	}
+
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor	monitor to be decremented by 1 */
+# define MONITOR_ATOMIC_DEC(monitor)					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		ib_uint64_t	value;					\
+		value = os_atomic_decrement_uint64(			\
+			(ib_uint64_t*) &MONITOR_VALUE(monitor), 1);	\
+		/* Note: This is not 100% accurate because of the	\
+		inherent race, we ignore it due to performance. */	\
+		if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) {	\
+			MONITOR_MIN_VALUE(monitor) = value;		\
+		}							\
+	}
+# define srv_mon_create() ((void) 0)
+# define srv_mon_free() ((void) 0)
+#else /* HAVE_ATOMIC_BUILTINS_64 */
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+extern ib_mutex_t	monitor_mutex;
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void);
+/*================*/
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void);
+/*==============*/
+
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor	monitor to be incremented by 1 */
+# define MONITOR_ATOMIC_INC(monitor) MONITOR_MUTEX_INC(&monitor_mutex, monitor)
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor	monitor to be decremented by 1 */
+# define MONITOR_ATOMIC_DEC(monitor) MONITOR_MUTEX_DEC(&monitor_mutex, monitor)
+#endif /* HAVE_ATOMIC_BUILTINS_64 */
+
+#define	MONITOR_DEC(monitor)						\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor)--;				\
+		if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) {  \
+			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+#ifdef UNIV_DEBUG_VALGRIND
+# define MONITOR_CHECK_DEFINED(value) do {	\
+	mon_type_t m = value;			\
+	UNIV_MEM_ASSERT_RW(&m, sizeof m);	\
+} while (0)
+#else /* UNIV_DEBUG_VALGRIND */
+# define MONITOR_CHECK_DEFINED(value) (void) 0
+#endif /* UNIV_DEBUG_VALGRIND */
+
+#define	MONITOR_INC_VALUE(monitor, value)				\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor) += (mon_type_t) (value);		\
+		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+#define	MONITOR_DEC_VALUE(monitor, value)				\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value);	\
+		MONITOR_VALUE(monitor) -= (mon_type_t) (value);		\
+		if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) {  \
+			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+/* Increment/decrement counter without check the monitor on/off bit, which
+could already be checked as a module group */
+#define	MONITOR_INC_NOCHECK(monitor)					\
+	do {								\
+		MONITOR_VALUE(monitor)++;				\
+		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	} while (0)							\
+
+#define	MONITOR_DEC_NOCHECK(monitor)					\
+	do {								\
+		MONITOR_VALUE(monitor)--;				\
+		if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) {  \
+			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	} while (0)
+
+/** Directly set a monitor counter's value */
+#define	MONITOR_SET(monitor, value)					\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor) = (mon_type_t) (value);		\
+		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+		if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) {  \
+			MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+/** Add time difference between now and input "value" (in seconds) to the
+monitor counter
+@param monitor	monitor to update for the time difference
+@param value	the start time value */
+#define	MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value)			\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		ullint	old_time = (value);				\
+		value = ut_time_us(NULL);				\
+		MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\
+	}
+
+/** This macro updates 3 counters in one call. However, it only checks the
+main/first monitor counter 'monitor', to see it is on or off to decide
+whether to do the update.
+@param monitor		the main monitor counter to update. It accounts for
+			the accumulative value for the counter.
+@param monitor_n_calls	counter that counts number of times this macro is
+			called
+@param monitor_per_call	counter that records the current and max value of
+			each incremental value
+@param value		incremental value to record this time */
+#define MONITOR_INC_VALUE_CUMULATIVE(					\
+		monitor, monitor_n_calls, monitor_per_call, value)	\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor_n_calls)++;			\
+		MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value);	\
+		if (MONITOR_VALUE(monitor_per_call)			\
+		    > MONITOR_MAX_VALUE(monitor_per_call)) {		\
+			MONITOR_MAX_VALUE(monitor_per_call) =		\
+				 (mon_type_t) (value);			\
+		}							\
+		MONITOR_VALUE(monitor) += (mon_type_t) (value);		\
+		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+/** Directly set a monitor counter's value, and if the value
+is monotonically increasing, only max value needs to be updated */
+#define	MONITOR_SET_UPD_MAX_ONLY(monitor, value)			\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor) = (mon_type_t) (value);		\
+		if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) {  \
+			MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
+		}							\
+	}
+
+/** Some values such as log sequence number are montomically increasing
+number, do not need to record max/min values */
+#define MONITOR_SET_SIMPLE(monitor, value)				\
+	MONITOR_CHECK_DEFINED(value);					\
+	if (MONITOR_IS_ON(monitor)) {					\
+		MONITOR_VALUE(monitor) = (mon_type_t) (value);		\
+	}
+
+/** Reset the monitor value and max/min value to zero. The reset
+operation would only be conducted when the counter is turned off */
+#define MONITOR_RESET_ALL(monitor)					\
+	do {								\
+		MONITOR_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE;	\
+		MONITOR_MAX_VALUE(monitor) = MAX_RESERVED;		\
+		MONITOR_MIN_VALUE(monitor) = MIN_RESERVED;		\
+		MONITOR_VALUE_RESET(monitor) = MONITOR_INIT_ZERO_VALUE;	\
+		MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED;	\
+		MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED;	\
+		MONITOR_LAST_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE;	\
+		MONITOR_FIELD(monitor, mon_start_time) =		\
+					MONITOR_INIT_ZERO_VALUE;	\
+		MONITOR_FIELD(monitor, mon_stop_time) =			\
+					MONITOR_INIT_ZERO_VALUE;	\
+		MONITOR_FIELD(monitor, mon_reset_time) =		\
+					MONITOR_INIT_ZERO_VALUE;	\
+	} while (0)
+
+/** Following four macros defines necessary operations to fetch and
+consolidate information from existing system status variables. */
+
+/** Save the passed-in value to mon_start_value field of monitor
+counters */
+#define MONITOR_SAVE_START(monitor, value) do {				\
+	MONITOR_CHECK_DEFINED(value);					\
+	(MONITOR_START_VALUE(monitor) =					\
+		(mon_type_t) (value) - MONITOR_VALUE_RESET(monitor));	\
+	} while (0)
+
+/** Save the passed-in value to mon_last_value field of monitor
+counters */
+#define MONITOR_SAVE_LAST(monitor)					\
+	do {								\
+		MONITOR_LAST_VALUE(monitor) = MONITOR_VALUE(monitor);	\
+		MONITOR_START_VALUE(monitor) += MONITOR_VALUE(monitor);	\
+	} while (0)
+
+/** Set monitor value to the difference of value and mon_start_value
+compensated by mon_last_value if accumulated value is required. */
+#define MONITOR_SET_DIFF(monitor, value)				\
+	MONITOR_SET_UPD_MAX_ONLY(monitor, ((value)			\
+	- MONITOR_VALUE_RESET(monitor)					\
+	- MONITOR_FIELD(monitor, mon_start_value)			\
+	+ MONITOR_FIELD(monitor, mon_last_value)))
+
+/****************************************************************//**
+Get monitor's monitor_info_t by its monitor id (index into the
+innodb_counter_info array
+@return	Point to corresponding monitor_info_t, or NULL if no such
+monitor */
+UNIV_INTERN
+monitor_info_t*
+srv_mon_get_info(
+/*=============*/
+	monitor_id_t	monitor_id);	/*!< id index into the
+					innodb_counter_info array */
+/****************************************************************//**
+Get monitor's name by its monitor id (index into the
+innodb_counter_info array
+@return	corresponding monitor name, or NULL if no such
+monitor */
+UNIV_INTERN
+const char*
+srv_mon_get_name(
+/*=============*/
+	monitor_id_t	monitor_id);	/*!< id index into the
+					innodb_counter_info array */
+
+/****************************************************************//**
+Turn on/off/reset monitor counters in a module. If module_value
+is NUM_MONITOR then turn on all monitor counters.
+@return	0 if successful, or the first monitor that cannot be
+turned on because it is already turned on. */
+UNIV_INTERN
+void
+srv_mon_set_module_control(
+/*=======================*/
+	monitor_id_t	module_id,	/*!< in: Module ID as in
+					monitor_counter_id. If it is
+					set to NUM_MONITOR, this means
+					we shall turn on all the counters */
+	mon_option_t	set_option);	/*!< in: Turn on/off reset the
+					counter */
+/****************************************************************//**
+This function consolidates some existing server counters used
+by "system status variables". These existing system variables do not have
+mechanism to start/stop and reset the counters, so we simulate these
+controls by remembering the corresponding counter values when the
+corresponding monitors are turned on/off/reset, and do appropriate
+mathematics to deduct the actual value. */
+UNIV_INTERN
+void
+srv_mon_process_existing_counter(
+/*=============================*/
+	monitor_id_t	monitor_id,	/*!< in: the monitor's ID as in
+					monitor_counter_id */
+	mon_option_t	set_option);	/*!< in: Turn on/off reset the
+					counter */
+/*************************************************************//**
+This function is used to calculate the maximum counter value
+since the start of monitor counter
+@return	max counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_max_since_start(
+/*=========================*/
+	monitor_id_t	monitor);	/*!< in: monitor id */
+/*************************************************************//**
+This function is used to calculate the minimum counter value
+since the start of monitor counter
+@return	min counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_min_since_start(
+/*=========================*/
+	monitor_id_t	monitor);	/*!< in: monitor id*/
+/*************************************************************//**
+Reset a monitor, create a new base line with the current monitor
+value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
+UNIV_INTERN
+void
+srv_mon_reset(
+/*==========*/
+	monitor_id_t	monitor);	/*!< in: monitor id*/
+/*************************************************************//**
+This function resets all values of a monitor counter */
+UNIV_INLINE
+void
+srv_mon_reset_all(
+/*==============*/
+	monitor_id_t	monitor);	/*!< in: monitor id*/
+/*************************************************************//**
+Turn on monitor counters that are marked as default ON. */
+UNIV_INTERN
+void
+srv_mon_default_on(void);
+/*====================*/
+
+#ifndef UNIV_NONINL
+#include "srv0mon.ic"
+#endif
+#else /* !UNIV_HOTBACKUP */
+# define MONITOR_INC(x)		((void) 0)
+# define MONITOR_DEC(x)		((void) 0)
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/srv0mon.ic b/storage/innobase/include/srv0mon.ic
new file mode 100644
index 00000000000..225390c6b6f
--- /dev/null
+++ b/storage/innobase/include/srv0mon.ic
@@ -0,0 +1,113 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/srv0mon.ic
+Server monitoring system
+
+Created 1/20/2010	Jimmy Yang
+************************************************************************/
+
+/*************************************************************//**
+This function is used to calculate the maximum counter value
+since the start of monitor counter
+@return	max counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_max_since_start(
+/*=========================*/
+	monitor_id_t	monitor)	/*!< in: monitor id */
+{
+	if (MONITOR_MAX_VALUE_START(monitor) == MAX_RESERVED) {
+
+		/* MONITOR_MAX_VALUE_START has not yet been
+		initialized, the max value since start is the
+		max count in MONITOR_MAX_VALUE */
+		MONITOR_MAX_VALUE_START(monitor) =
+				MONITOR_MAX_VALUE(monitor);
+
+	} else if (MONITOR_MAX_VALUE(monitor) != MAX_RESERVED
+		   && (MONITOR_MAX_VALUE(monitor)
+		       + MONITOR_VALUE_RESET(monitor)
+		      > MONITOR_MAX_VALUE_START(monitor))) {
+
+		/* If the max value since reset (as specified
+		in MONITOR_MAX_VALUE) plus the reset value is
+		larger than MONITOR_MAX_VALUE_START, reset
+		MONITOR_MAX_VALUE_START to this new max value */
+		MONITOR_MAX_VALUE_START(monitor) =
+				MONITOR_MAX_VALUE(monitor)
+				+ MONITOR_VALUE_RESET(monitor);
+	}
+
+	return(MONITOR_MAX_VALUE_START(monitor));
+}
+
+/*************************************************************//**
+This function is used to calculate the minimum counter value
+since the start of monitor counter
+@return	min counter value since start. */
+UNIV_INLINE
+mon_type_t
+srv_mon_calc_min_since_start(
+/*=========================*/
+	monitor_id_t	monitor)	/*!< in: monitor id */
+{
+	if (MONITOR_MIN_VALUE_START(monitor) == MIN_RESERVED) {
+
+		/* MONITOR_MIN_VALUE_START has not yet been
+		initialized, the min value since start is the
+		min count in MONITOR_MIN_VALUE */
+		MONITOR_MIN_VALUE_START(monitor) =
+				MONITOR_MIN_VALUE(monitor);
+
+	} else if (MONITOR_MIN_VALUE(monitor) != MIN_RESERVED
+		   && (MONITOR_MIN_VALUE(monitor)
+		       + MONITOR_VALUE_RESET(monitor)
+		       < MONITOR_MIN_VALUE_START(monitor))) {
+
+		/* If the min value since reset (as specified
+		in MONITOR_MIN_VALUE) plus the reset value is
+		less than MONITOR_MIN_VALUE_START, reset
+		MONITOR_MIN_VALUE_START to this new min value */
+		MONITOR_MIN_VALUE_START(monitor) =
+			MONITOR_MIN_VALUE(monitor)
+                        + MONITOR_VALUE_RESET(monitor);
+        }
+
+	return(MONITOR_MIN_VALUE_START(monitor));
+}
+
+/*************************************************************//**
+This function resets all values of a monitor counter */
+UNIV_INLINE
+void
+srv_mon_reset_all(
+/*==============*/
+	monitor_id_t	monitor)	/*!< in: monitor id */
+{
+	/* Do not reset all counter values if monitor is still on. */
+	if (MONITOR_IS_ON(monitor)) {
+		fprintf(stderr, "InnoDB: Cannot reset all values for "
+			"monitor counter %s while it is on. Please "
+			"turn it off and retry. \n",
+			srv_mon_get_name(monitor));
+	} else {
+		MONITOR_RESET_ALL(monitor);
+	}
+}
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
new file mode 100644
index 00000000000..7a6c9f93e3d
--- /dev/null
+++ b/storage/innobase/include/srv0srv.h
@@ -0,0 +1,888 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2008, 2009, Google Inc.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0srv.h
+The server main program
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef srv0srv_h
+#define srv0srv_h
+
+#include "univ.i"
+#ifndef UNIV_HOTBACKUP
+#include "log0log.h"
+#include "sync0sync.h"
+#include "os0sync.h"
+#include "que0types.h"
+#include "trx0types.h"
+#include "srv0conc.h"
+#include "buf0checksum.h"
+#include "ut0counter.h"
+
+/* Global counters used inside InnoDB. */
+struct srv_stats_t {
+	typedef ib_counter_t<lsn_t, 1, single_indexer_t> lsn_ctr_1_t;
+	typedef ib_counter_t<ulint, 1, single_indexer_t> ulint_ctr_1_t;
+	typedef ib_counter_t<lint, 1, single_indexer_t> lint_ctr_1_t;
+	typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
+	typedef ib_counter_t<ib_int64_t, 1, single_indexer_t> ib_int64_ctr_1_t;
+
+	/** Count the amount of data written in total (in bytes) */
+	ulint_ctr_1_t		data_written;
+
+	/** Number of the log write requests done */
+	ulint_ctr_1_t		log_write_requests;
+
+	/** Number of physical writes to the log performed */
+	ulint_ctr_1_t		log_writes;
+
+	/** Amount of data written to the log files in bytes */
+	lsn_ctr_1_t		os_log_written;
+
+	/** Number of writes being done to the log files */
+	lint_ctr_1_t		os_log_pending_writes;
+
+	/** We increase this counter, when we don't have enough
+	space in the log buffer and have to flush it */
+	ulint_ctr_1_t		log_waits;
+
+	/** Count the number of times the doublewrite buffer was flushed */
+	ulint_ctr_1_t		dblwr_writes;
+
+	/** Store the number of pages that have been flushed to the
+	doublewrite buffer */
+	ulint_ctr_1_t		dblwr_pages_written;
+
+	/** Store the number of write requests issued */
+	ulint_ctr_1_t		buf_pool_write_requests;
+
+	/** Store the number of times when we had to wait for a free page
+	in the buffer pool. It happens when the buffer pool is full and we
+	need to make a flush, in order to be able to read or create a page. */
+	ulint_ctr_1_t		buf_pool_wait_free;
+
+	/** Count the number of pages that were written from buffer
+	pool to the disk */
+	ulint_ctr_1_t		buf_pool_flushed;
+
+	/** Number of buffer pool reads that led to the reading of
+	a disk page */
+	ulint_ctr_1_t		buf_pool_reads;
+
+	/** Number of data read in total (in bytes) */
+	ulint_ctr_1_t		data_read;
+
+	/** Wait time of database locks */
+	ib_int64_ctr_1_t	n_lock_wait_time;
+
+	/** Number of database lock waits */
+	ulint_ctr_1_t		n_lock_wait_count;
+
+	/** Number of threads currently waiting on database locks */
+	lint_ctr_1_t		n_lock_wait_current_count;
+
+	/** Number of rows read. */
+	ulint_ctr_64_t		n_rows_read;
+
+	/** Number of rows updated */
+	ulint_ctr_64_t		n_rows_updated;
+
+	/** Number of rows deleted */
+	ulint_ctr_64_t		n_rows_deleted;
+
+	/** Number of rows inserted */
+	ulint_ctr_64_t		n_rows_inserted;
+};
+
+extern const char*	srv_main_thread_op_info;
+
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+extern const char	srv_mysql50_table_name_prefix[10];
+
+/* The monitor thread waits on this event. */
+extern os_event_t	srv_monitor_event;
+
+/* The error monitor thread waits on this event. */
+extern os_event_t	srv_error_event;
+
+/** The buffer pool dump/load thread waits on this event. */
+extern os_event_t	srv_buf_dump_event;
+
+/** The buffer pool dump/load file name */
+#define SRV_BUF_DUMP_FILENAME_DEFAULT	"ib_buffer_pool"
+extern char*		srv_buf_dump_filename;
+
+/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
+and/or load it during startup. */
+extern char		srv_buffer_pool_dump_at_shutdown;
+extern char		srv_buffer_pool_load_at_startup;
+
+/* Whether to disable file system cache if it is defined */
+extern char		srv_disable_sort_file_cache;
+
+/* If the last data file is auto-extended, we add this many pages to it
+at a time */
+#define SRV_AUTO_EXTEND_INCREMENT	\
+	(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
+
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+extern ib_mutex_t	srv_monitor_file_mutex;
+/* Temporary file for innodb monitor output */
+extern FILE*	srv_monitor_file;
+/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
+This mutex has a very high rank; threads reserving it should not
+be holding any InnoDB latches. */
+extern ib_mutex_t	srv_dict_tmpfile_mutex;
+/* Temporary file for output from the data dictionary */
+extern FILE*	srv_dict_tmpfile;
+/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
+This mutex has a very low rank; threads reserving it should not
+acquire any further latches or sleep before releasing this one. */
+extern ib_mutex_t	srv_misc_tmpfile_mutex;
+/* Temporary file for miscellanous diagnostic output */
+extern FILE*	srv_misc_tmpfile;
+
+/* Server parameters which are read from the initfile */
+
+extern char*	srv_data_home;
+
+#ifdef UNIV_LOG_ARCHIVE
+extern char*	srv_arch_dir;
+#endif /* UNIV_LOG_ARCHIVE */
+
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+extern my_bool	srv_read_only_mode;
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+extern my_bool	srv_file_per_table;
+/** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
+extern	ulong	srv_thread_sleep_delay;
+#if defined(HAVE_ATOMIC_BUILTINS)
+/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
+extern	ulong	srv_adaptive_max_sleep_delay;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+/** The file format to use on new *.ibd files. */
+extern ulint	srv_file_format;
+/** Whether to check file format during startup.  A value of
+UNIV_FORMAT_MAX + 1 means no checking ie. FALSE.  The default is to
+set it to the highest format we support. */
+extern ulint	srv_max_file_format_at_startup;
+/** Place locks to records only i.e. do not use next-key locking except
+on duplicate key checking and foreign key checking */
+extern ibool	srv_locks_unsafe_for_binlog;
+
+/** Sort buffer size in index creation */
+extern ulong	srv_sort_buf_size;
+/** Maximum modification log file size for online index creation */
+extern unsigned long long	srv_online_max_size;
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads.
+Currently we support native aio on windows and linux */
+extern my_bool	srv_use_native_aio;
+#ifdef __WIN__
+extern ibool	srv_use_native_conditions;
+#endif /* __WIN__ */
+#endif /* !UNIV_HOTBACKUP */
+
+/** Server undo tablespaces directory, can be absolute path. */
+extern char*	srv_undo_dir;
+
+/** Number of undo tablespaces to use. */
+extern ulong	srv_undo_tablespaces;
+
+/** The number of UNDO tablespaces that are open and ready to use. */
+extern ulint	srv_undo_tablespaces_open;
+
+/* The number of undo segments to use */
+extern ulong	srv_undo_logs;
+
+extern ulint	srv_n_data_files;
+extern char**	srv_data_file_names;
+extern ulint*	srv_data_file_sizes;
+extern ulint*	srv_data_file_is_raw_partition;
+
+extern ibool	srv_auto_extend_last_data_file;
+extern ulint	srv_last_file_size_max;
+extern char*	srv_log_group_home_dir;
+#ifndef UNIV_HOTBACKUP
+extern ulong	srv_auto_extend_increment;
+
+extern ibool	srv_created_new_raw;
+
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
+extern ulong	srv_n_log_files;
+extern ib_uint64_t	srv_log_file_size;
+extern ib_uint64_t	srv_log_file_size_requested;
+extern ulint	srv_log_buffer_size;
+extern ulong	srv_flush_log_at_trx_commit;
+extern uint	srv_flush_log_at_timeout;
+extern char	srv_adaptive_flushing;
+
+/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
+even if they are marked as "corrupted". Mostly it is for DBA to process
+corrupted index and table */
+extern my_bool	srv_load_corrupted;
+
+/* The sort order table of the MySQL latin1_swedish_ci character set
+collation */
+extern const byte*	srv_latin1_ordering;
+#ifndef UNIV_HOTBACKUP
+extern my_bool	srv_use_sys_malloc;
+#else
+extern ibool	srv_use_sys_malloc;
+#endif /* UNIV_HOTBACKUP */
+extern ulint	srv_buf_pool_size;	/*!< requested size in bytes */
+extern ulint    srv_buf_pool_instances; /*!< requested number of buffer pool instances */
+extern ulong	srv_n_page_hash_locks;	/*!< number of locks to
+					protect buf_pool->page_hash */
+extern ulong	srv_LRU_scan_depth;	/*!< Scan depth for LRU
+					flush batch */
+extern ulong	srv_flush_neighbors;	/*!< whether or not to flush
+					neighbors of a block */
+extern ulint	srv_buf_pool_old_size;	/*!< previously requested size */
+extern ulint	srv_buf_pool_curr_size;	/*!< current size in bytes */
+extern ulint	srv_mem_pool_size;
+extern ulint	srv_lock_table_size;
+
+extern ulint	srv_n_file_io_threads;
+extern my_bool	srv_random_read_ahead;
+extern ulong	srv_read_ahead_threshold;
+extern ulint	srv_n_read_io_threads;
+extern ulint	srv_n_write_io_threads;
+
+/* Number of IO operations per second the server can do */
+extern ulong    srv_io_capacity;
+
+/* We use this dummy default value at startup for max_io_capacity.
+The real value is set based on the value of io_capacity. */
+#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT	(~0UL)
+#define SRV_MAX_IO_CAPACITY_LIMIT		(~0UL)
+extern ulong    srv_max_io_capacity;
+/* Returns the number of IO operations that is X percent of the
+capacity. PCT_IO(5) -> returns the number of IO operations that
+is 5% of the max where max is srv_io_capacity.  */
+#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) (p) / 100.0)))
+
+/* The "innodb_stats_method" setting, decides how InnoDB is going
+to treat NULL value when collecting statistics. It is not defined
+as enum type because the configure option takes unsigned integer type. */
+extern ulong	srv_innodb_stats_method;
+
+#ifdef UNIV_LOG_ARCHIVE
+extern ibool		srv_log_archive_on;
+extern ibool		srv_archive_recovery;
+extern ib_uint64_t	srv_archive_recovery_limit_lsn;
+#endif /* UNIV_LOG_ARCHIVE */
+
+extern char*	srv_file_flush_method_str;
+extern ulint	srv_unix_file_flush_method;
+extern ulint	srv_win_file_flush_method;
+
+extern ulint	srv_max_n_open_files;
+
+extern ulong	srv_max_dirty_pages_pct;
+extern ulong	srv_max_dirty_pages_pct_lwm;
+
+extern ulong	srv_adaptive_flushing_lwm;
+extern ulong	srv_flushing_avg_loops;
+
+extern ulong	srv_force_recovery;
+#ifndef DBUG_OFF
+extern ulong	srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
+
+extern ulint	srv_fast_shutdown;	/*!< If this is 1, do not do a
+					purge and index buffer merge.
+					If this 2, do not even flush the
+					buffer pool to data files at the
+					shutdown: we effectively 'crash'
+					InnoDB (but lose no committed
+					transactions). */
+extern ibool	srv_innodb_status;
+
+extern unsigned long long	srv_stats_transient_sample_pages;
+extern my_bool			srv_stats_persistent;
+extern unsigned long long	srv_stats_persistent_sample_pages;
+extern my_bool			srv_stats_auto_recalc;
+
+extern ibool	srv_use_doublewrite_buf;
+extern ulong	srv_doublewrite_batch_size;
+extern ulong	srv_checksum_algorithm;
+
+extern ulong	srv_max_buf_pool_modified_pct;
+extern ulong	srv_max_purge_lag;
+extern ulong	srv_max_purge_lag_delay;
+
+extern ulong	srv_replication_delay;
+/*-------------------------------------------*/
+
+extern my_bool	srv_print_innodb_monitor;
+extern my_bool	srv_print_innodb_lock_monitor;
+extern ibool	srv_print_innodb_tablespace_monitor;
+extern ibool	srv_print_verbose_log;
+#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \
+	"Using innodb_table_monitor is deprecated and it may be removed " \
+	"in future releases. Please use the InnoDB INFORMATION_SCHEMA " \
+	"tables instead, see " REFMAN "innodb-i_s-tables.html"
+extern ibool	srv_print_innodb_table_monitor;
+
+extern ibool	srv_monitor_active;
+extern ibool	srv_error_monitor_active;
+
+/* TRUE during the lifetime of the buffer pool dump/load thread */
+extern ibool	srv_buf_dump_thread_active;
+
+/* TRUE during the lifetime of the stats thread */
+extern ibool	srv_dict_stats_thread_active;
+
+extern ulong	srv_n_spin_wait_rounds;
+extern ulong	srv_n_free_tickets_to_enter;
+extern ulong	srv_thread_sleep_delay;
+extern ulong	srv_spin_wait_delay;
+extern ibool	srv_priority_boost;
+
+extern ulint	srv_truncated_status_writes;
+extern ulint	srv_available_undo_logs;
+
+extern	ulint	srv_mem_pool_size;
+extern	ulint	srv_lock_table_size;
+
+#ifdef UNIV_DEBUG
+extern	ibool	srv_print_thread_releases;
+extern	ibool	srv_print_lock_waits;
+extern	ibool	srv_print_buf_io;
+extern	ibool	srv_print_log_io;
+extern	ibool	srv_print_latch_waits;
+#else /* UNIV_DEBUG */
+# define srv_print_thread_releases	FALSE
+# define srv_print_lock_waits		FALSE
+# define srv_print_buf_io		FALSE
+# define srv_print_log_io		FALSE
+# define srv_print_latch_waits		FALSE
+#endif /* UNIV_DEBUG */
+
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+extern my_bool	srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+#ifdef UNIV_DEBUG
+extern my_bool	srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
+extern ulint	srv_fatal_semaphore_wait_threshold;
+#define SRV_SEMAPHORE_WAIT_EXTENSION	7200
+extern ulint	srv_dml_needed_delay;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+/** Mutex protecting some server global variables. */
+extern ib_mutex_t	server_mutex;
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+#define SRV_MAX_N_IO_THREADS	130
+
+/* Array of English strings describing the current state of an
+i/o handler thread */
+extern const char* srv_io_thread_op_info[];
+extern const char* srv_io_thread_function[];
+
+/* the number of purge threads to use from the worker pool (currently 0 or 1) */
+extern ulong srv_n_purge_threads;
+
+/* the number of pages to purge in one batch */
+extern ulong srv_purge_batch_size;
+
+/* the number of sync wait arrays */
+extern ulong srv_sync_array_size;
+
+/* print all user-level transactions deadlocks to mysqld stderr */
+extern my_bool srv_print_all_deadlocks;
+
+extern my_bool	srv_cmp_per_index_enabled;
+
+/** Status variables to be passed to MySQL */
+extern struct export_var_t export_vars;
+
+/** Global counters */
+extern srv_stats_t	srv_stats;
+
+# ifdef UNIV_PFS_THREAD
+/* Keys to register InnoDB threads with performance schema */
+extern mysql_pfs_key_t	buf_page_cleaner_thread_key;
+extern mysql_pfs_key_t	trx_rollback_clean_thread_key;
+extern mysql_pfs_key_t	io_handler_thread_key;
+extern mysql_pfs_key_t	srv_lock_timeout_thread_key;
+extern mysql_pfs_key_t	srv_error_monitor_thread_key;
+extern mysql_pfs_key_t	srv_monitor_thread_key;
+extern mysql_pfs_key_t	srv_master_thread_key;
+extern mysql_pfs_key_t	srv_purge_thread_key;
+extern mysql_pfs_key_t	recv_writer_thread_key;
+
+/* This macro register the current thread and its key with performance
+schema */
+#  define pfs_register_thread(key)			\
+do {								\
+	struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+	PSI_THREAD_CALL(set_thread)(psi);			\
+} while (0)
+
+/* This macro delist the current thread from performance schema */
+#  define pfs_delete_thread()				\
+do {								\
+	PSI_THREAD_CALL(delete_current_thread)();		\
+} while (0)
+# endif /* UNIV_PFS_THREAD */
+
+#endif /* !UNIV_HOTBACKUP */
+
+/** Types of raw partitions in innodb_data_file_path */
+enum {
+	SRV_NOT_RAW = 0,	/*!< Not a raw partition */
+	SRV_NEW_RAW,		/*!< A 'newraw' partition, only to be
+				initialized */
+	SRV_OLD_RAW		/*!< An initialized raw partition */
+};
+
+/** Alternatives for the file flush option in Unix; see the InnoDB manual
+about what these mean */
+enum {
+	SRV_UNIX_FSYNC = 1,	/*!< fsync, the default */
+	SRV_UNIX_O_DSYNC,	/*!< open log files in O_SYNC mode */
+	SRV_UNIX_LITTLESYNC,	/*!< do not call os_file_flush()
+				when writing data files, but do flush
+				after writing to log files */
+	SRV_UNIX_NOSYNC,	/*!< do not flush after writing */
+	SRV_UNIX_O_DIRECT,	/*!< invoke os_file_set_nocache() on
+				data files. This implies using
+				non-buffered IO but still using fsync,
+				the reason for which is that some FS
+				do not flush meta-data when
+				unbuffered IO happens */
+	SRV_UNIX_O_DIRECT_NO_FSYNC
+				/*!< do not use fsync() when using
+				direct IO i.e.: it can be set to avoid
+				the fsync() call that we make when
+				using SRV_UNIX_O_DIRECT. However, in
+				this case user/DBA should be sure about
+				the integrity of the meta-data */
+};
+
+/** Alternatives for file i/o in Windows */
+enum {
+	SRV_WIN_IO_NORMAL = 1,	/*!< buffered I/O */
+	SRV_WIN_IO_UNBUFFERED	/*!< unbuffered I/O; this is the default */
+};
+
+/** Alternatives for srv_force_recovery. Non-zero values are intended
+to help the user get a damaged database up so that he can dump intact
+tables and rows with SELECT INTO OUTFILE. The database must not otherwise
+be used with these options! A bigger number below means that all precautions
+of lower numbers are included. */
+enum {
+	SRV_FORCE_IGNORE_CORRUPT = 1,	/*!< let the server run even if it
+					detects a corrupt page */
+	SRV_FORCE_NO_BACKGROUND	= 2,	/*!< prevent the main thread from
+					running: if a crash would occur
+					in purge, this prevents it */
+	SRV_FORCE_NO_TRX_UNDO = 3,	/*!< do not run trx rollback after
+					recovery */
+	SRV_FORCE_NO_IBUF_MERGE = 4,	/*!< prevent also ibuf operations:
+					if they would cause a crash, better
+					not do them */
+	SRV_FORCE_NO_UNDO_LOG_SCAN = 5,	/*!< do not look at undo logs when
+					starting the database: InnoDB will
+					treat even incomplete transactions
+					as committed */
+	SRV_FORCE_NO_LOG_REDO = 6	/*!< do not do the log roll-forward
+					in connection with recovery */
+};
+
+/* Alternatives for srv_innodb_stats_method, which could be changed by
+setting innodb_stats_method */
+enum srv_stats_method_name_enum {
+	SRV_STATS_NULLS_EQUAL,		/* All NULL values are treated as
+					equal. This is the default setting
+					for innodb_stats_method */
+	SRV_STATS_NULLS_UNEQUAL,	/* All NULL values are treated as
+					NOT equal. */
+	SRV_STATS_NULLS_IGNORED		/* NULL values are ignored */
+};
+
+typedef enum srv_stats_method_name_enum		srv_stats_method_name_t;
+
+#ifndef UNIV_HOTBACKUP
+/** Types of threads existing in the system. */
+enum srv_thread_type {
+	SRV_NONE,			/*!< None */
+	SRV_WORKER,			/*!< threads serving parallelized
+					queries and queries released from
+					lock wait */
+	SRV_PURGE,			/*!< Purge coordinator thread */
+	SRV_MASTER			/*!< the master thread, (whose type
+					number must be biggest) */
+};
+
+/*********************************************************************//**
+Boots Innobase server. */
+UNIV_INTERN
+void
+srv_boot(void);
+/*==========*/
+/*********************************************************************//**
+Initializes the server. */
+UNIV_INTERN
+void
+srv_init(void);
+/*==========*/
+/*********************************************************************//**
+Frees the data structures created in srv_init(). */
+UNIV_INTERN
+void
+srv_free(void);
+/*==========*/
+/*********************************************************************//**
+Initializes the synchronization primitives, memory system, and the thread
+local storage. */
+UNIV_INTERN
+void
+srv_general_init(void);
+/*==================*/
+/*********************************************************************//**
+Sets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_set_io_thread_op_info(
+/*======================*/
+	ulint		i,	/*!< in: the 'segment' of the i/o thread */
+	const char*	str);	/*!< in: constant char string describing the
+				state */
+/*********************************************************************//**
+Resets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_reset_io_thread_op_info();
+/*=========================*/
+/*******************************************************************//**
+Tells the purge thread that there has been activity in the database
+and wakes up the purge thread if it is suspended (not sleeping).  Note
+that there is a small chance that the purge thread stays suspended
+(we do not protect our operation with the srv_sys_t:mutex, for
+performance reasons). */
+UNIV_INTERN
+void
+srv_wake_purge_thread_if_not_active(void);
+/*=====================================*/
+/*******************************************************************//**
+Tells the Innobase server that there has been activity in the database
+and wakes up the master thread if it is suspended (not sleeping). Used
+in the MySQL interface. Note that there is a small chance that the master
+thread stays suspended (we do not protect our operation with the kernel
+mutex, for performace reasons). */
+UNIV_INTERN
+void
+srv_active_wake_master_thread(void);
+/*===============================*/
+/*******************************************************************//**
+Wakes up the master thread if it is suspended or being suspended. */
+UNIV_INTERN
+void
+srv_wake_master_thread(void);
+/*========================*/
+/******************************************************************//**
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
+UNIV_INTERN
+ibool
+srv_printf_innodb_monitor(
+/*======================*/
+	FILE*	file,		/*!< in: output stream */
+	ibool	nowait,		/*!< in: whether to wait for the
+				lock_sys_t::mutex */
+	ulint*	trx_start,	/*!< out: file position of the start of
+				the list of active transactions */
+	ulint*	trx_end);	/*!< out: file position of the end of
+				the list of active transactions */
+
+/******************************************************************//**
+Function to pass InnoDB status variables to MySQL */
+UNIV_INTERN
+void
+srv_export_innodb_status(void);
+/*==========================*/
+/*******************************************************************//**
+Get current server activity count. We don't hold srv_sys::mutex while
+reading this value as it is only used in heuristics.
+@return activity count. */
+UNIV_INTERN
+ulint
+srv_get_activity_count(void);
+/*========================*/
+/*******************************************************************//**
+Check if there has been any activity.
+@return FALSE if no change in activity counter. */
+UNIV_INTERN
+ibool
+srv_check_activity(
+/*===============*/
+	ulint		old_activity_count);	/*!< old activity count */
+/******************************************************************//**
+Increment the server activity counter. */
+UNIV_INTERN
+void
+srv_inc_activity_count(void);
+/*=========================*/
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+UNIV_INTERN
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/**********************************************************************//**
+Check whether any background thread is active. If so, return the thread
+type.
+@return SRV_NONE if all are are suspended or have exited, thread
+type if any are still active. */
+UNIV_INTERN
+enum srv_thread_type
+srv_get_active_thread_type(void);
+/*============================*/
+
+extern "C" {
+
+/*********************************************************************//**
+A thread which prints the info output by various InnoDB monitors.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_monitor_thread)(
+/*===============================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+
+/*********************************************************************//**
+The master thread controlling the server.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_master_thread)(
+/*==============================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+
+/*************************************************************************
+A thread which prints warnings about semaphore waits which have lasted
+too long. These can be used to track bugs which cause hangs.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_error_monitor_thread)(
+/*=====================================*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+
+/*********************************************************************//**
+Purge coordinator thread that schedules the purge tasks.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_purge_coordinator_thread)(
+/*=========================================*/
+	void*	arg __attribute__((unused)));	/*!< in: a dummy parameter
+						required by os_thread_create */
+
+/*********************************************************************//**
+Worker thread that reads tasks from the work queue and executes them.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_worker_thread)(
+/*==============================*/
+	void*	arg __attribute__((unused)));	/*!< in: a dummy parameter
+						required by os_thread_create */
+} /* extern "C" */
+
+/**********************************************************************//**
+Get count of tasks in the queue.
+@return number of tasks in queue  */
+UNIV_INTERN
+ulint
+srv_get_task_queue_length(void);
+/*===========================*/
+
+/*********************************************************************//**
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+enough threads were suspended at the moment */
+UNIV_INTERN
+ulint
+srv_release_threads(
+/*================*/
+	enum srv_thread_type	type,	/*!< in: thread type */
+	ulint			n);	/*!< in: number of threads to release */
+
+/**********************************************************************//**
+Check whether any background thread are active. If so print which thread
+is active. Send the threads wakeup signal.
+@return name of thread that is active or NULL */
+UNIV_INTERN
+const char*
+srv_any_background_threads_are_active(void);
+/*=======================================*/
+
+/**********************************************************************//**
+Wakeup the purge threads. */
+UNIV_INTERN
+void
+srv_purge_wakeup(void);
+/*==================*/
+
+/** Status variables to be passed to MySQL */
+struct export_var_t{
+	ulint innodb_data_pending_reads;	/*!< Pending reads */
+	ulint innodb_data_pending_writes;	/*!< Pending writes */
+	ulint innodb_data_pending_fsyncs;	/*!< Pending fsyncs */
+	ulint innodb_data_fsyncs;		/*!< Number of fsyncs so far */
+	ulint innodb_data_read;			/*!< Data bytes read */
+	ulint innodb_data_writes;		/*!< I/O write requests */
+	ulint innodb_data_written;		/*!< Data bytes written */
+	ulint innodb_data_reads;		/*!< I/O read requests */
+	char  innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */
+	char  innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
+	ulint innodb_buffer_pool_pages_total;	/*!< Buffer pool size */
+	ulint innodb_buffer_pool_pages_data;	/*!< Data pages */
+	ulint innodb_buffer_pool_bytes_data;	/*!< File bytes used */
+	ulint innodb_buffer_pool_pages_dirty;	/*!< Dirty data pages */
+	ulint innodb_buffer_pool_bytes_dirty;	/*!< File bytes modified */
+	ulint innodb_buffer_pool_pages_misc;	/*!< Miscellanous pages */
+	ulint innodb_buffer_pool_pages_free;	/*!< Free pages */
+#ifdef UNIV_DEBUG
+	ulint innodb_buffer_pool_pages_latched;	/*!< Latched pages */
+#endif /* UNIV_DEBUG */
+	ulint innodb_buffer_pool_read_requests;	/*!< buf_pool->stat.n_page_gets */
+	ulint innodb_buffer_pool_reads;		/*!< srv_buf_pool_reads */
+	ulint innodb_buffer_pool_wait_free;	/*!< srv_buf_pool_wait_free */
+	ulint innodb_buffer_pool_pages_flushed;	/*!< srv_buf_pool_flushed */
+	ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+	ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
+	ulint innodb_buffer_pool_read_ahead;	/*!< srv_read_ahead */
+	ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
+	ulint innodb_dblwr_pages_written;	/*!< srv_dblwr_pages_written */
+	ulint innodb_dblwr_writes;		/*!< srv_dblwr_writes */
+	ibool innodb_have_atomic_builtins;	/*!< HAVE_ATOMIC_BUILTINS */
+	ulint innodb_log_waits;			/*!< srv_log_waits */
+	ulint innodb_log_write_requests;	/*!< srv_log_write_requests */
+	ulint innodb_log_writes;		/*!< srv_log_writes */
+	lsn_t innodb_os_log_written;		/*!< srv_os_log_written */
+	ulint innodb_os_log_fsyncs;		/*!< fil_n_log_flushes */
+	ulint innodb_os_log_pending_writes;	/*!< srv_os_log_pending_writes */
+	ulint innodb_os_log_pending_fsyncs;	/*!< fil_n_pending_log_flushes */
+	ulint innodb_page_size;			/*!< UNIV_PAGE_SIZE */
+	ulint innodb_pages_created;		/*!< buf_pool->stat.n_pages_created */
+	ulint innodb_pages_read;		/*!< buf_pool->stat.n_pages_read */
+	ulint innodb_pages_written;		/*!< buf_pool->stat.n_pages_written */
+	ulint innodb_row_lock_waits;		/*!< srv_n_lock_wait_count */
+	ulint innodb_row_lock_current_waits;	/*!< srv_n_lock_wait_current_count */
+	ib_int64_t innodb_row_lock_time;	/*!< srv_n_lock_wait_time
+						/ 1000 */
+	ulint innodb_row_lock_time_avg;		/*!< srv_n_lock_wait_time
+						/ 1000
+						/ srv_n_lock_wait_count */
+	ulint innodb_row_lock_time_max;		/*!< srv_n_lock_max_wait_time
+						/ 1000 */
+	ulint innodb_rows_read;			/*!< srv_n_rows_read */
+	ulint innodb_rows_inserted;		/*!< srv_n_rows_inserted */
+	ulint innodb_rows_updated;		/*!< srv_n_rows_updated */
+	ulint innodb_rows_deleted;		/*!< srv_n_rows_deleted */
+	ulint innodb_num_open_files;		/*!< fil_n_file_opened */
+	ulint innodb_truncated_status_writes;	/*!< srv_truncated_status_writes */
+	ulint innodb_available_undo_logs;       /*!< srv_available_undo_logs */
+#ifdef UNIV_DEBUG
+	ulint innodb_purge_trx_id_age;		/*!< rw_max_trx_id - purged trx_id */
+	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
+						- purged view's min trx_id */
+#endif /* UNIV_DEBUG */
+};
+
+/** Thread slot in the thread table.  */
+struct srv_slot_t{
+	srv_thread_type type;			/*!< thread type: user,
+						utility etc. */
+	ibool		in_use;			/*!< TRUE if this slot
+						is in use */
+	ibool		suspended;		/*!< TRUE if the thread is
+						waiting for the event of this
+						slot */
+	ib_time_t	suspend_time;		/*!< time when the thread was
+						suspended. Initialized by
+						lock_wait_table_reserve_slot()
+						for lock wait */
+	ulong		wait_timeout;		/*!< wait time that if exceeded
+						the thread will be timed out.
+						Initialized by
+						lock_wait_table_reserve_slot()
+						for lock wait */
+	os_event_t	event;			/*!< event used in suspending
+						the thread when it has nothing
+						to do */
+	que_thr_t*	thr;			/*!< suspended query thread
+						(only used for user threads) */
+};
+
+#else /* !UNIV_HOTBACKUP */
+# define srv_use_adaptive_hash_indexes		FALSE
+# define srv_use_native_aio			FALSE
+# define srv_force_recovery			0UL
+# define srv_set_io_thread_op_info(t,info)	((void) 0)
+# define srv_reset_io_thread_op_info()		((void) 0)
+# define srv_is_being_started			0
+# define srv_win_file_flush_method		SRV_WIN_IO_UNBUFFERED
+# define srv_unix_file_flush_method		SRV_UNIX_O_DSYNC
+# define srv_start_raw_disk_in_use		0
+# define srv_file_per_table			1
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic
new file mode 100644
index 00000000000..53405c06f97
--- /dev/null
+++ b/storage/innobase/include/srv0srv.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0srv.ic
+Server main program
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
new file mode 100644
index 00000000000..40d502f4459
--- /dev/null
+++ b/storage/innobase/include/srv0start.h
@@ -0,0 +1,167 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/srv0start.h
+Starts the Innobase database server
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef srv0start_h
+#define srv0start_h
+
+#include "univ.i"
+#include "log0log.h"
+#include "ut0byte.h"
+
+#ifdef __WIN__
+#define SRV_PATH_SEPARATOR	'\\'
+#else
+#define SRV_PATH_SEPARATOR	'/'
+#endif
+
+/*********************************************************************//**
+Normalizes a directory path for Windows: converts slashes to backslashes. */
+UNIV_INTERN
+void
+srv_normalize_path_for_win(
+/*=======================*/
+	char*	str);	/*!< in/out: null-terminated character string */
+/*********************************************************************//**
+Reads the data files and their sizes from a character string given in
+the .cnf file.
+@return	TRUE if ok, FALSE on parse error */
+UNIV_INTERN
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+	char*	str);	/*!< in/out: the data file path string */
+/*********************************************************************//**
+Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
+and srv_parse_log_group_home_dirs(). */
+UNIV_INTERN
+void
+srv_free_paths_and_sizes(void);
+/*==========================*/
+/*********************************************************************//**
+Adds a slash or a backslash to the end of a string if it is missing
+and the string is not empty.
+@return	string which has the separator if the string is not empty */
+UNIV_INTERN
+char*
+srv_add_path_separator_if_needed(
+/*=============================*/
+	char*	str);	/*!< in: null-terminated character string */
+#ifndef UNIV_HOTBACKUP
+/****************************************************************//**
+Starts Innobase and creates a new database if database files
+are not found and the user wants.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+innobase_start_or_create_for_mysql(void);
+/*====================================*/
+/****************************************************************//**
+Shuts down the Innobase database.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+innobase_shutdown_for_mysql(void);
+
+/********************************************************************
+Signal all per-table background threads to shutdown, and wait for them to do
+so. */
+UNIV_INTERN
+void
+srv_shutdown_table_bg_threads(void);
+/*=============================*/
+
+/*************************************************************//**
+Copy the file path component of the physical file to parameter. It will
+copy up to and including the terminating path separator.
+@return number of bytes copied or ULINT_UNDEFINED if destination buffer
+	is smaller than the path to be copied. */
+UNIV_INTERN
+ulint
+srv_path_copy(
+/*==========*/
+	char*		dest,		/*!< out: destination buffer */
+	ulint		dest_len,	/*!< in: max bytes to copy */
+	const char*	basedir,	/*!< in: base directory */
+	const char*	table_name)	/*!< in: source table name */
+	__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*======================*/
+	dict_table_t*	table,		/*!< in: table */
+	char*			filename,	/*!< out: filename */
+	ulint			max_len)	/*!< in: filename max length */
+	__attribute__((nonnull));
+
+/** Log sequence number at shutdown */
+extern	lsn_t	srv_shutdown_lsn;
+/** Log sequence number immediately after startup */
+extern	lsn_t	srv_start_lsn;
+
+#ifdef HAVE_DARWIN_THREADS
+/** TRUE if the F_FULLFSYNC option is available */
+extern	ibool	srv_have_fullfsync;
+#endif
+
+/** TRUE if the server is being started */
+extern	ibool	srv_is_being_started;
+/** TRUE if the server was successfully started */
+extern	ibool	srv_was_started;
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
+extern	ibool	srv_startup_is_before_trx_rollback_phase;
+
+/** TRUE if a raw partition is in use */
+extern	ibool	srv_start_raw_disk_in_use;
+
+
+/** Shutdown state */
+enum srv_shutdown_state {
+	SRV_SHUTDOWN_NONE = 0,	/*!< Database running normally */
+	SRV_SHUTDOWN_CLEANUP,	/*!< Cleaning up in
+				logs_empty_and_mark_files_at_shutdown() */
+	SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the
+				purge threads must have completed their
+				work. Once we enter this phase the
+				page_cleaner can clean up the buffer
+				pool and exit */
+	SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that
+				the buffer pool can be freed: flush
+				all file spaces and close all files */
+	SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */
+};
+
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+extern	enum srv_shutdown_state	srv_shutdown_state;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Log 'spaces' have id's >= this */
+#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
+
+#endif
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
new file mode 100644
index 00000000000..15dbdcb540d
--- /dev/null
+++ b/storage/innobase/include/sync0arr.h
@@ -0,0 +1,155 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0arr.h
+The wait array used in synchronization primitives
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0arr_h
+#define sync0arr_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "ut0mem.h"
+#include "os0thread.h"
+
+/** Synchronization wait array cell */
+struct sync_cell_t;
+/** Synchronization wait array */
+struct sync_array_t;
+
+/******************************************************************//**
+Get an instance of the sync wait array and reserve a wait array cell
+in the instance for waiting for an object. The event of the cell is
+reset to nonsignalled state.
+If reserving cell of the instance fails, try to get another new
+instance until we can reserve an empty cell of it.
+@return the instance found, never NULL. */
+UNIV_INLINE
+sync_array_t*
+sync_array_get_and_reserve_cell(
+/*============================*/
+	void*		object,	/*!< in: pointer to the object to wait for */
+	ulint		type,	/*!< in: lock request type */
+	const char*	file,	/*!< in: file where requested */
+	ulint		line,	/*!< in: line where requested */
+	ulint*		index);	/*!< out: index of the reserved cell */
+/******************************************************************//**
+Reserves a wait array cell for waiting for an object.
+The event of the cell is reset to nonsignalled state.
+@return true if free cell is found, otherwise false */
+UNIV_INTERN
+bool
+sync_array_reserve_cell(
+/*====================*/
+	sync_array_t*	arr,	/*!< in: wait array */
+	void*		object, /*!< in: pointer to the object to wait for */
+	ulint		type,	/*!< in: lock request type */
+	const char*	file,	/*!< in: file where requested */
+	ulint		line,	/*!< in: line where requested */
+	ulint*		index); /*!< out: index of the reserved cell */
+/******************************************************************//**
+This function should be called when a thread starts to wait on
+a wait array cell. In the debug version this function checks
+if the wait for a semaphore will result in a deadlock, in which
+case prints info and asserts. */
+UNIV_INTERN
+void
+sync_array_wait_event(
+/*==================*/
+	sync_array_t*	arr,	/*!< in: wait array */
+	ulint		index);	 /*!< in: index of the reserved cell */
+/******************************************************************//**
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+UNIV_INTERN
+void
+sync_array_free_cell(
+/*=================*/
+	sync_array_t*	arr,	/*!< in: wait array */
+	ulint		index);	/*!< in: index of the cell in array */
+/**********************************************************************//**
+Note that one of the wait objects was signalled. */
+UNIV_INTERN
+void
+sync_array_object_signalled(void);
+/*=============================*/
+
+/**********************************************************************//**
+If the wakeup algorithm does not work perfectly at semaphore relases,
+this function will do the waking (see the comment in mutex_exit). This
+function should be called about every 1 second in the server. */
+UNIV_INTERN
+void
+sync_arr_wake_threads_if_sema_free(void);
+/*====================================*/
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return	TRUE if fatal semaphore wait threshold was exceeded */
+UNIV_INTERN
+ibool
+sync_array_print_long_waits(
+/*========================*/
+	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
+	const void**	sema)	/*!< out: longest-waited-for semaphore */
+	__attribute__((nonnull));
+/********************************************************************//**
+Validates the integrity of the wait array. Checks
+that the number of reserved cells equals the count variable. */
+UNIV_INTERN
+void
+sync_array_validate(
+/*================*/
+	sync_array_t*	arr);	/*!< in: sync wait array */
+/**********************************************************************//**
+Prints info of the wait array. */
+UNIV_INTERN
+void
+sync_array_print(
+/*=============*/
+	FILE*		file);	/*!< in: file where to print */
+
+/**********************************************************************//**
+Create the primary system wait array(s), they are protected by an OS mutex */
+UNIV_INTERN
+void
+sync_array_init(
+/*============*/
+	ulint		n_threads);	/*!< in: Number of slots to create */
+/**********************************************************************//**
+Close sync array wait sub-system. */
+UNIV_INTERN
+void
+sync_array_close(void);
+/*==================*/
+
+/**********************************************************************//**
+Get an instance of the sync wait array. */
+UNIV_INTERN
+sync_array_t*
+sync_array_get(void);
+/*================*/
+
+#ifndef UNIV_NONINL
+#include "sync0arr.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic
new file mode 100644
index 00000000000..18a46dd0a41
--- /dev/null
+++ b/storage/innobase/include/sync0arr.ic
@@ -0,0 +1,64 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0arr.ic
+The wait array for synchronization primitives
+
+Inline code
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+/** User configured sync array size */
+extern ulong srv_sync_array_size;
+
+/******************************************************************//**
+Get an instance of the sync wait array and reserve a wait array cell
+in the instance for waiting for an object. The event of the cell is
+reset to nonsignalled state.
+If reserving cell of the instance fails, try to get another new
+instance until we can reserve an empty cell of it.
+@return the instance found, never NULL. */
+UNIV_INLINE
+sync_array_t*
+sync_array_get_and_reserve_cell(
+/*============================*/
+	void*		object,	/*!< in: pointer to the object to wait for */
+	ulint		type,	/*!< in: lock request type */
+	const char*	file,	/*!< in: file where requested */
+	ulint		line,	/*!< in: line where requested */
+	ulint*		index)	/*!< out: index of the reserved cell */
+{
+	sync_array_t*	sync_arr;
+	bool		reserved = false;
+
+	for (ulint i = 0; i < srv_sync_array_size && !reserved; ++i) {
+		sync_arr = sync_array_get();
+		reserved = sync_array_reserve_cell(sync_arr, object, type,
+						   file, line, index);
+	}
+
+	/* This won't be true every time, for the loop above may execute
+	more than srv_sync_array_size times to reserve a cell.
+	But an assertion here makes the code more solid. */
+	ut_a(reserved);
+
+	return sync_arr;
+}
+
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
new file mode 100644
index 00000000000..fdcbb1b6fa5
--- /dev/null
+++ b/storage/innobase/include/sync0rw.h
@@ -0,0 +1,813 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0rw.h
+The read-write lock (for threads, not for database transactions)
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0rw_h
+#define sync0rw_h
+
+#include "univ.i"
+#ifndef UNIV_HOTBACKUP
+#include "ut0lst.h"
+#include "ut0counter.h"
+#include "sync0sync.h"
+#include "os0sync.h"
+
+/* The following undef is to prevent a name conflict with a macro
+in MySQL: */
+#undef rw_lock_t
+#endif /* !UNIV_HOTBACKUP */
+
+/** Counters for RW locks. */
+struct rw_lock_stats_t {
+	typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+
+	/** number of spin waits on rw-latches,
+	resulted during shared (read) locks */
+	ib_int64_counter_t	rw_s_spin_wait_count;
+
+	/** number of spin loop rounds on rw-latches,
+	resulted during shared (read) locks */
+	ib_int64_counter_t	rw_s_spin_round_count;
+
+	/** number of OS waits on rw-latches,
+	resulted during shared (read) locks */
+	ib_int64_counter_t	rw_s_os_wait_count;
+
+	/** number of unlocks (that unlock shared locks),
+	set only when UNIV_SYNC_PERF_STAT is defined */
+	ib_int64_counter_t	rw_s_exit_count;
+
+	/** number of spin waits on rw-latches,
+	resulted during exclusive (write) locks */
+	ib_int64_counter_t	rw_x_spin_wait_count;
+
+	/** number of spin loop rounds on rw-latches,
+	resulted during exclusive (write) locks */
+	ib_int64_counter_t	rw_x_spin_round_count;
+
+	/** number of OS waits on rw-latches,
+	resulted during exclusive (write) locks */
+	ib_int64_counter_t	rw_x_os_wait_count;
+
+	/** number of unlocks (that unlock exclusive locks),
+	set only when UNIV_SYNC_PERF_STAT is defined */
+	ib_int64_counter_t	rw_x_exit_count;
+};
+
+/* Latch types; these are used also in btr0btr.h: keep the numerical values
+smaller than 30 and the order of the numerical values like below! */
+#define RW_S_LATCH	1
+#define	RW_X_LATCH	2
+#define	RW_NO_LATCH	3
+
+#ifndef UNIV_HOTBACKUP
+/* We decrement lock_word by this amount for each x_lock. It is also the
+start value for the lock_word, meaning that it limits the maximum number
+of concurrent read locks before the rw_lock breaks. The current value of
+0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
+#define X_LOCK_DECR		0x00100000
+
+struct rw_lock_t;
+#ifdef UNIV_SYNC_DEBUG
+struct rw_lock_debug_t;
+#endif /* UNIV_SYNC_DEBUG */
+
+typedef UT_LIST_BASE_NODE_T(rw_lock_t)	rw_lock_list_t;
+
+extern rw_lock_list_t	rw_lock_list;
+extern ib_mutex_t		rw_lock_list_mutex;
+
+#ifdef UNIV_SYNC_DEBUG
+/* The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+
+acquired in addition to the mutex protecting the lock. */
+extern ib_mutex_t		rw_lock_debug_mutex;
+extern os_event_t	rw_lock_debug_event;	/*!< If deadlock detection does
+					not get immediately the mutex it
+					may wait for this event */
+extern ibool		rw_lock_debug_waiters;	/*!< This is set to TRUE, if
+					there may be waiters for the event */
+#endif /* UNIV_SYNC_DEBUG */
+
+/** Counters for RW locks. */
+extern rw_lock_stats_t	rw_lock_stats;
+
+#ifdef UNIV_PFS_RWLOCK
+/* Following are rwlock keys used to register with MySQL
+performance schema */
+# ifdef UNIV_LOG_ARCHIVE
+extern	mysql_pfs_key_t	archive_lock_key;
+# endif /* UNIV_LOG_ARCHIVE */
+extern	mysql_pfs_key_t btr_search_latch_key;
+extern	mysql_pfs_key_t	buf_block_lock_key;
+# ifdef UNIV_SYNC_DEBUG
+extern	mysql_pfs_key_t	buf_block_debug_latch_key;
+# endif /* UNIV_SYNC_DEBUG */
+extern	mysql_pfs_key_t	dict_operation_lock_key;
+extern	mysql_pfs_key_t	checkpoint_lock_key;
+extern	mysql_pfs_key_t	fil_space_latch_key;
+extern	mysql_pfs_key_t	fts_cache_rw_lock_key;
+extern	mysql_pfs_key_t	fts_cache_init_rw_lock_key;
+extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
+extern	mysql_pfs_key_t	trx_purge_latch_key;
+extern	mysql_pfs_key_t	index_tree_rw_lock_key;
+extern	mysql_pfs_key_t	index_online_log_key;
+extern	mysql_pfs_key_t	dict_table_stats_key;
+extern  mysql_pfs_key_t trx_sys_rw_lock_key;
+extern  mysql_pfs_key_t hash_table_rw_lock_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+
+#ifndef UNIV_PFS_RWLOCK
+/******************************************************************//**
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed.
+if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
+defined, the rwlock are instrumented with performance schema probes. */
+# ifdef UNIV_DEBUG
+#  ifdef UNIV_SYNC_DEBUG
+#   define rw_lock_create(K, L, level)				\
+	rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
+#  else	/* UNIV_SYNC_DEBUG */
+#   define rw_lock_create(K, L, level)				\
+	rw_lock_create_func((L), #L, __FILE__, __LINE__)
+#  endif/* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
+#  define rw_lock_create(K, L, level)				\
+	rw_lock_create_func((L), __FILE__, __LINE__)
+# endif	/* UNIV_DEBUG */
+
+/**************************************************************//**
+NOTE! The following macros should be used in rw locking and
+unlocking, not the corresponding function. */
+
+# define rw_lock_s_lock(M)					\
+	rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_s_lock_inline(M, P, F, L)			\
+	rw_lock_s_lock_func((M), (P), (F), (L))
+
+# define rw_lock_s_lock_gen(M, P)				\
+	rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_s_lock_gen_nowait(M, P)			\
+	rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_s_lock_nowait(M, F, L)				\
+	rw_lock_s_lock_low((M), 0, (F), (L))
+
+# ifdef UNIV_SYNC_DEBUG
+#  define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(P, L)
+# else
+#  define rw_lock_s_unlock_gen(L, P)	rw_lock_s_unlock_func(L)
+# endif
+
+
+# define rw_lock_x_lock(M)					\
+	rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_x_lock_inline(M, P, F, L)			\
+	rw_lock_x_lock_func((M), (P), (F), (L))
+
+# define rw_lock_x_lock_gen(M, P)				\
+	rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_x_lock_nowait(M)				\
+	rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
+
+# define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
+	rw_lock_x_lock_func_nowait((M), (F), (L))
+
+# ifdef UNIV_SYNC_DEBUG
+#  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(P, L)
+# else
+#  define rw_lock_x_unlock_gen(L, P)	rw_lock_x_unlock_func(L)
+# endif
+
+# define rw_lock_free(M)		rw_lock_free_func(M)
+
+#else /* !UNIV_PFS_RWLOCK */
+
+/* Following macros point to Performance Schema instrumented functions. */
+# ifdef UNIV_DEBUG
+#  ifdef UNIV_SYNC_DEBUG
+#   define rw_lock_create(K, L, level)				\
+	pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__)
+#  else	/* UNIV_SYNC_DEBUG */
+#   define rw_lock_create(K, L, level)				\
+	pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
+#  endif/* UNIV_SYNC_DEBUG */
+# else	/* UNIV_DEBUG */
+#  define rw_lock_create(K, L, level)				\
+	pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__)
+# endif	/* UNIV_DEBUG */
+
+/******************************************************************
+NOTE! The following macros should be used in rw locking and
+unlocking, not the corresponding function. */
+
+# define rw_lock_s_lock(M)					\
+	pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_s_lock_inline(M, P, F, L)			\
+	pfs_rw_lock_s_lock_func((M), (P), (F), (L))
+
+# define rw_lock_s_lock_gen(M, P)				\
+	pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_s_lock_gen_nowait(M, P)			\
+	pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_s_lock_nowait(M, F, L)				\
+	pfs_rw_lock_s_lock_low((M), 0, (F), (L))
+
+# ifdef UNIV_SYNC_DEBUG
+#  define rw_lock_s_unlock_gen(L, P)	pfs_rw_lock_s_unlock_func(P, L)
+# else
+#  define rw_lock_s_unlock_gen(L, P)	pfs_rw_lock_s_unlock_func(L)
+# endif
+
+# define rw_lock_x_lock(M)					\
+	pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_x_lock_inline(M, P, F, L)			\
+	pfs_rw_lock_x_lock_func((M), (P), (F), (L))
+
+# define rw_lock_x_lock_gen(M, P)				\
+	pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_x_lock_nowait(M)				\
+	pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
+
+# define rw_lock_x_lock_func_nowait_inline(M, F, L)		\
+	pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
+
+# ifdef UNIV_SYNC_DEBUG
+#  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(P, L)
+# else
+#  define rw_lock_x_unlock_gen(L, P)	pfs_rw_lock_x_unlock_func(L)
+# endif
+
+# define rw_lock_free(M)		pfs_rw_lock_free_func(M)
+
+#endif /* UNIV_PFS_RWLOCK */
+
+#define rw_lock_s_unlock(L)		rw_lock_s_unlock_gen(L, 0)
+#define rw_lock_x_unlock(L)		rw_lock_x_unlock_gen(L, 0)
+
+/******************************************************************//**
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+UNIV_INTERN
+void
+rw_lock_create_func(
+/*================*/
+	rw_lock_t*	lock,		/*!< in: pointer to memory */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+	ulint		level,		/*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+	const char*	cmutex_name,	/*!< in: mutex name */
+#endif /* UNIV_DEBUG */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline);		/*!< in: file line where created */
+/******************************************************************//**
+Calling this function is obligatory only if the memory buffer containing
+the rw-lock is freed. Removes an rw-lock object from the global list. The
+rw-lock is checked to be in the non-locked state. */
+UNIV_INTERN
+void
+rw_lock_free_func(
+/*==============*/
+	rw_lock_t*	lock);	/*!< in: rw-lock */
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks.
+@return	TRUE */
+UNIV_INTERN
+ibool
+rw_lock_validate(
+/*=============*/
+	rw_lock_t*	lock);	/*!< in: rw-lock */
+#endif /* UNIV_DEBUG */
+/******************************************************************//**
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning.
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass __attribute__((unused)),
+				/*!< in: pass value; != 0, if the lock will be
+				passed to another thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock an rw-lock in shared mode
+for the current thread. If the rw-lock is locked in exclusive mode, or
+there is an exclusive lock request waiting, the function spins a preset
+time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
+suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately.
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_func_nowait(
+/*=======================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Releases a shared mode lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+UNIV_INTERN
+void
+rw_lock_x_lock_func(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
+This function is used in the insert buffer to move the ownership of an
+x-latch on a buffer frame to the current thread. The x-latch was set by
+the buffer read operation and it protected the buffer frame while the
+read was done. The ownership is moved because we want that the current
+thread is able to acquire a second x-latch which is stored in an mtr.
+This, in turn, is needed to pass the debug checks of index page
+operations. */
+UNIV_INTERN
+void
+rw_lock_x_lock_move_ownership(
+/*==========================*/
+	rw_lock_t*	lock);	/*!< in: lock which was x-locked in the
+				buffer read */
+/******************************************************************//**
+Returns the value of writer_count for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return	value of writer_count */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/********************************************************************//**
+Check if there are threads waiting for the rw-lock.
+@return	1 if waiters, 0 otherwise */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
+Returns the write-status of the lock - this function made more sense
+with the old rw_lock implementation.
+@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
+Returns the number of readers.
+@return	number of readers */
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+	const rw_lock_t*	lock);	/*!< in: rw-lock */
+/******************************************************************//**
+Decrements lock_word the specified amount if it is greater than 0.
+This is used by both s_lock and x_lock operations.
+@return	TRUE if decr occurs */
+UNIV_INLINE
+ibool
+rw_lock_lock_word_decr(
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount);	/*!< in: amount to decrement */
+/******************************************************************//**
+Increments lock_word the specified amount and returns new value.
+@return	lock->lock_word after increment */
+UNIV_INLINE
+lint
+rw_lock_lock_word_incr(
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount);	/*!< in: amount to increment */
+/******************************************************************//**
+This function sets the lock->writer_thread and lock->recursive fields.
+For platforms where we are using atomic builtins instead of lock->mutex
+it sets the lock->writer_thread field using atomics to ensure memory
+ordering. Note that it is assumed that the caller of this function
+effectively owns the lock i.e.: nobody else is allowed to modify
+lock->writer_thread at this point in time.
+The protocol is that lock->writer_thread MUST be updated BEFORE the
+lock->recursive flag is set. */
+UNIV_INLINE
+void
+rw_lock_set_writer_id_and_recursion_flag(
+/*=====================================*/
+	rw_lock_t*	lock,		/*!< in/out: lock to work on */
+	ibool		recursive);	/*!< in: TRUE if recursion
+					allowed */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+UNIV_INTERN
+ibool
+rw_lock_own(
+/*========*/
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
+					RW_LOCK_EX */
+	__attribute__((warn_unused_result));
+#endif /* UNIV_SYNC_DEBUG */
+/******************************************************************//**
+Checks if somebody has locked the rw-lock in the specified mode. */
+UNIV_INTERN
+ibool
+rw_lock_is_locked(
+/*==============*/
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		lock_type);	/*!< in: lock type: RW_LOCK_SHARED,
+					RW_LOCK_EX */
+#ifdef UNIV_SYNC_DEBUG
+/***************************************************************//**
+Prints debug info of an rw-lock. */
+UNIV_INTERN
+void
+rw_lock_print(
+/*==========*/
+	rw_lock_t*	lock);	/*!< in: rw-lock */
+/***************************************************************//**
+Prints debug info of currently locked rw-locks. */
+UNIV_INTERN
+void
+rw_lock_list_print_info(
+/*====================*/
+	FILE*	file);		/*!< in: file where to print */
+/***************************************************************//**
+Returns the number of currently locked rw-locks.
+Works only in the debug version.
+@return	number of locked rw-locks */
+UNIV_INTERN
+ulint
+rw_lock_n_locked(void);
+/*==================*/
+
+/*#####################################################################*/
+
+/******************************************************************//**
+Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+UNIV_INTERN
+void
+rw_lock_debug_mutex_enter(void);
+/*===========================*/
+/******************************************************************//**
+Releases the debug mutex. */
+UNIV_INTERN
+void
+rw_lock_debug_mutex_exit(void);
+/*==========================*/
+/*********************************************************************//**
+Prints info of a debug struct. */
+UNIV_INTERN
+void
+rw_lock_debug_print(
+/*================*/
+	FILE*			f,	/*!< in: output stream */
+	rw_lock_debug_t*	info);	/*!< in: debug struct */
+#endif /* UNIV_SYNC_DEBUG */
+
+/* NOTE! The structure appears here only for the compiler to know its size.
+Do not use its fields directly! */
+
+/** The structure used in the spin lock implementation of a read-write
+lock. Several threads may have a shared lock simultaneously in this
+lock, but only one writer may have an exclusive lock, in which case no
+shared locks are allowed. To prevent starving of a writer blocked by
+readers, a writer may queue for x-lock by decrementing lock_word: no
+new readers will be let in while the thread waits for readers to
+exit. */
+struct rw_lock_t {
+	volatile lint	lock_word;
+				/*!< Holds the state of the lock. */
+	volatile ulint	waiters;/*!< 1: there are waiters */
+	volatile ibool	recursive;/*!< Default value FALSE which means the lock
+				is non-recursive. The value is typically set
+				to TRUE making normal rw_locks recursive. In
+				case of asynchronous IO, when a non-zero
+				value of 'pass' is passed then we keep the
+				lock non-recursive.
+				This flag also tells us about the state of
+				writer_thread field. If this flag is set
+				then writer_thread MUST contain the thread
+				id of the current x-holder or wait-x thread.
+				This flag must be reset in x_unlock
+				functions before incrementing the lock_word */
+	volatile os_thread_id_t	writer_thread;
+				/*!< Thread id of writer thread. Is only
+				guaranteed to have sane and non-stale
+				value iff recursive flag is set. */
+	os_event_t	event;	/*!< Used by sync0arr.cc for thread queueing */
+	os_event_t	wait_ex_event;
+				/*!< Event for next-writer to wait on. A thread
+				must decrement lock_word before waiting. */
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+	ib_mutex_t	mutex;		/*!< The mutex protecting rw_lock_t */
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+	UT_LIST_NODE_T(rw_lock_t) list;
+				/*!< All allocated rw locks are put into a
+				list */
+#ifdef UNIV_SYNC_DEBUG
+	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+				/*!< In the debug version: pointer to the debug
+				info list of the lock */
+	ulint	level;		/*!< Level in the global latching order. */
+#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_PFS_RWLOCK
+	struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
+#endif
+	ulint count_os_wait;	/*!< Count of os_waits. May not be accurate */
+	const char*	cfile_name;/*!< File name where lock created */
+        /* last s-lock file/line is not guaranteed to be correct */
+	const char*	last_s_file_name;/*!< File name where last s-locked */
+	const char*	last_x_file_name;/*!< File name where last x-locked */
+	ibool		writer_is_wait_ex;
+				/*!< This is TRUE if the writer field is
+				RW_LOCK_WAIT_EX; this field is located far
+				from the memory update hotspot fields which
+				are at the start of this struct, thus we can
+				peek this field without causing much memory
+				bus traffic */
+	unsigned	cline:14;	/*!< Line where created */
+	unsigned	last_s_line:14;	/*!< Line number where last time s-locked */
+	unsigned	last_x_line:14;	/*!< Line number where last time x-locked */
+#ifdef UNIV_DEBUG
+	ulint	magic_n;	/*!< RW_LOCK_MAGIC_N */
+/** Value of rw_lock_t::magic_n */
+#define	RW_LOCK_MAGIC_N	22643
+#endif /* UNIV_DEBUG */
+};
+
+#ifdef UNIV_SYNC_DEBUG
+/** The structure for storing debug info of an rw-lock.  All access to this
+structure must be protected by rw_lock_debug_mutex_enter(). */
+struct	rw_lock_debug_t {
+
+	os_thread_id_t thread_id;  /*!< The thread id of the thread which
+				locked the rw-lock */
+	ulint	pass;		/*!< Pass value given in the lock operation */
+	ulint	lock_type;	/*!< Type of the lock: RW_LOCK_EX,
+				RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
+	const char*	file_name;/*!< File name where the lock was obtained */
+	ulint	line;		/*!< Line where the rw-lock was locked */
+	UT_LIST_NODE_T(rw_lock_debug_t) list;
+				/*!< Debug structs are linked in a two-way
+				list */
+};
+#endif /* UNIV_SYNC_DEBUG */
+
+/* For performance schema instrumentation, a new set of rwlock
+wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
+The instrumentations are not planted directly into original
+functions, so that we keep the underlying function as they
+are. And in case, user wants to "take out" some rwlock from
+instrumentation even if performance schema (UNIV_PFS_RWLOCK)
+is defined, they can do so by reinstating APIs directly link to
+original underlying functions.
+The instrumented function names have prefix of "pfs_rw_lock_" vs.
+original name prefix of "rw_lock_". Following are list of functions
+that have been instrumented:
+
+rw_lock_create()
+rw_lock_x_lock()
+rw_lock_x_lock_gen()
+rw_lock_x_lock_nowait()
+rw_lock_x_unlock_gen()
+rw_lock_s_lock()
+rw_lock_s_lock_gen()
+rw_lock_s_lock_nowait()
+rw_lock_s_unlock_gen()
+rw_lock_free()
+*/
+
+#ifdef UNIV_PFS_RWLOCK
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_create_func()
+NOTE! Please use the corresponding macro rw_lock_create(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_create_func(
+/*====================*/
+	PSI_rwlock_key  key,		/*!< in: key registered with
+					performance schema */
+	rw_lock_t*	lock,		/*!< in: rw lock */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+	ulint		level,		/*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+	const char*	cmutex_name,	/*!< in: mutex name */
+#endif /* UNIV_DEBUG */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline);		/*!< in: file line where created */
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for
+rw_lock_x_lock_func_nowait()
+NOTE! Please use the corresponding macro, not directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_x_lock_func_nowait(
+/*===========================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
+this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_s_lock_low(
+/*===================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif
+	rw_lock_t*	lock);	/*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_free_func()
+NOTE! Please use the corresponding macro rw_lock_free(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_free_func(
+/*==================*/
+	rw_lock_t*	lock);	/*!< in: rw-lock */
+#endif  /* UNIV_PFS_RWLOCK */
+
+
+#ifndef UNIV_NONINL
+#include "sync0rw.ic"
+#endif
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
new file mode 100644
index 00000000000..bb05ae7daf1
--- /dev/null
+++ b/storage/innobase/include/sync0rw.ic
@@ -0,0 +1,797 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0rw.ic
+The read-write lock (for threads)
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+/******************************************************************//**
+Lock an rw-lock in shared mode for the current thread. If the rw-lock is
+locked in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+waiting for the lock before suspending the thread. */
+UNIV_INTERN
+void
+rw_lock_s_lock_spin(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line);	/*!< in: line where requested */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Inserts the debug information for an rw-lock. */
+UNIV_INTERN
+void
+rw_lock_add_debug_info(
+/*===================*/
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		pass,		/*!< in: pass value */
+	ulint		lock_type,	/*!< in: lock type */
+	const char*	file_name,	/*!< in: file where requested */
+	ulint		line);		/*!< in: line where requested */
+/******************************************************************//**
+Removes a debug information struct for an rw-lock. */
+UNIV_INTERN
+void
+rw_lock_remove_debug_info(
+/*======================*/
+	rw_lock_t*	lock,		/*!< in: rw-lock */
+	ulint		pass,		/*!< in: pass value */
+	ulint		lock_type);	/*!< in: lock type */
+#endif /* UNIV_SYNC_DEBUG */
+
+/********************************************************************//**
+Check if there are threads waiting for the rw-lock.
+@return	1 if waiters, 0 otherwise */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
+{
+	return(lock->waiters);
+}
+
+/********************************************************************//**
+Sets lock->waiters to 1. It is not an error if lock->waiters is already
+1. On platforms where ATOMIC builtins are used this function enforces a
+memory barrier. */
+UNIV_INLINE
+void
+rw_lock_set_waiter_flag(
+/*====================*/
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	(void) os_compare_and_swap_ulint(&lock->waiters, 0, 1);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+	lock->waiters = 1;
+	os_wmb;
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/********************************************************************//**
+Resets lock->waiters to 0. It is not an error if lock->waiters is already
+0. On platforms where ATOMIC builtins are used this function enforces a
+memory barrier. */
+UNIV_INLINE
+void
+rw_lock_reset_waiter_flag(
+/*======================*/
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	(void) os_compare_and_swap_ulint(&lock->waiters, 1, 0);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+	lock->waiters = 0;
+	os_wmb;
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
+Returns the write-status of the lock - this function made more sense
+with the old rw_lock implementation.
+@return	RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
+{
+	lint lock_word = lock->lock_word;
+	if (lock_word > 0) {
+		/* return NOT_LOCKED in s-lock state, like the writer
+		member of the old lock implementation. */
+		return(RW_LOCK_NOT_LOCKED);
+	} else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
+		return(RW_LOCK_EX);
+	} else {
+		ut_ad(lock_word > -X_LOCK_DECR);
+		return(RW_LOCK_WAIT_EX);
+	}
+}
+
+/******************************************************************//**
+Returns the number of readers.
+@return	number of readers */
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
+{
+	lint lock_word = lock->lock_word;
+	if (lock_word > 0) {
+		/* s-locked, no x-waiters */
+		return(X_LOCK_DECR - lock_word);
+	} else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
+		/* s-locked, with x-waiters */
+		return((ulint)(-lock_word));
+	}
+	return(0);
+}
+
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+UNIV_INLINE
+ib_mutex_t*
+rw_lock_get_mutex(
+/*==============*/
+	rw_lock_t*	lock)
+{
+	return(&(lock->mutex));
+}
+#endif
+
+/******************************************************************//**
+Returns the value of writer_count for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call.
+@return	value of writer_count */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+	const rw_lock_t*	lock)	/*!< in: rw-lock */
+{
+	lint lock_copy = lock->lock_word;
+	if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
+		return(0);
+	}
+	return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
+}
+
+/******************************************************************//**
+Two different implementations for decrementing the lock_word of a rw_lock:
+one for systems supporting atomic operations, one for others. This does
+does not support recusive x-locks: they should be handled by the caller and
+need not be atomic since they are performed by the current lock holder.
+Returns true if the decrement was made, false if not.
+@return	TRUE if decr occurs */
+UNIV_INLINE
+ibool
+rw_lock_lock_word_decr(
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount)		/*!< in: amount to decrement */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	lint local_lock_word;
+
+	os_rmb;
+	local_lock_word = lock->lock_word;
+	while (local_lock_word > 0) {
+		if (os_compare_and_swap_lint(&lock->lock_word,
+					     local_lock_word,
+					     local_lock_word - amount)) {
+			return(TRUE);
+		}
+		local_lock_word = lock->lock_word;
+	}
+	return(FALSE);
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+	ibool success = FALSE;
+	mutex_enter(&(lock->mutex));
+	if (lock->lock_word > 0) {
+		lock->lock_word -= amount;
+		success = TRUE;
+	}
+	mutex_exit(&(lock->mutex));
+	return(success);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
+Increments lock_word the specified amount and returns new value.
+@return	lock->lock_word after increment */
+UNIV_INLINE
+lint
+rw_lock_lock_word_incr(
+/*===================*/
+	rw_lock_t*	lock,		/*!< in/out: rw-lock */
+	ulint		amount)		/*!< in: amount of increment */
+{
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	return(os_atomic_increment_lint(&lock->lock_word, amount));
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+	lint local_lock_word;
+
+	mutex_enter(&(lock->mutex));
+
+	lock->lock_word += amount;
+	local_lock_word = lock->lock_word;
+
+	mutex_exit(&(lock->mutex));
+
+	return(local_lock_word);
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
+This function sets the lock->writer_thread and lock->recursive fields.
+For platforms where we are using atomic builtins instead of lock->mutex
+it sets the lock->writer_thread field using atomics to ensure memory
+ordering. Note that it is assumed that the caller of this function
+effectively owns the lock i.e.: nobody else is allowed to modify
+lock->writer_thread at this point in time.
+The protocol is that lock->writer_thread MUST be updated BEFORE the
+lock->recursive flag is set. */
+UNIV_INLINE
+void
+rw_lock_set_writer_id_and_recursion_flag(
+/*=====================================*/
+	rw_lock_t*	lock,		/*!< in/out: lock to work on */
+	ibool		recursive)	/*!< in: TRUE if recursion
+					allowed */
+{
+	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	os_thread_id_t	local_thread;
+	ibool		success;
+
+	/* Prevent Valgrind warnings about writer_thread being
+	uninitialized.  It does not matter if writer_thread is
+	uninitialized, because we are comparing writer_thread against
+	itself, and the operation should always succeed. */
+	UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
+
+	local_thread = lock->writer_thread;
+	success = os_compare_and_swap_thread_id(
+		&lock->writer_thread, local_thread, curr_thread);
+	ut_a(success);
+	lock->recursive = recursive;
+
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+	mutex_enter(&lock->mutex);
+	lock->writer_thread = curr_thread;
+	lock->recursive = recursive;
+	mutex_exit(&lock->mutex);
+
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+}
+
+/******************************************************************//**
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning.
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass __attribute__((unused)),
+				/*!< in: pass value; != 0, if the lock will be
+				passed to another thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	if (!rw_lock_lock_word_decr(lock, 1)) {
+		/* Locking did not succeed */
+		return(FALSE);
+	}
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
+#endif
+	/* These debugging values are not set safely: they may be incorrect
+	or even refer to a line that is invalid for the file name. */
+	lock->last_s_file_name = file_name;
+	lock->last_s_line = line;
+
+	return(TRUE);	/* locking succeeded */
+}
+
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in shared mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
+the lock, before suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	/* NOTE: As we do not know the thread ids for threads which have
+	s-locked a latch, and s-lockers will be served only after waiting
+	x-lock requests have been fulfilled, then if this thread already
+	owns an s-lock here, it may end up in a deadlock with another thread
+	which requests an x-lock here. Therefore, we will forbid recursive
+	s-locking of a latch: the following assert will warn the programmer
+	of the possibility of this kind of a deadlock. If we want to implement
+	safe recursive s-locking, we should keep in a list the thread ids of
+	the threads which have s-locked a latch. This would use some CPU
+	time. */
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+	ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
+
+		return; /* Success */
+	} else {
+		/* Did not succeed, try spin wait */
+
+		rw_lock_s_lock_spin(lock, pass, file_name, line);
+
+		return;
+	}
+}
+
+/******************************************************************//**
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately.
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_func_nowait(
+/*=======================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	ibool success;
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+	success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
+#else
+
+	success = FALSE;
+	mutex_enter(&(lock->mutex));
+	if (lock->lock_word == X_LOCK_DECR) {
+		lock->lock_word = 0;
+		success = TRUE;
+	}
+	mutex_exit(&(lock->mutex));
+
+#endif
+	if (success) {
+		rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+
+	} else if (lock->recursive
+		   && os_thread_eq(lock->writer_thread,
+				   os_thread_get_curr_id())) {
+		/* Relock: this lock_word modification is safe since no other
+		threads can modify (lock, unlock, or reserve) lock_word while
+		there is an exclusive writer and this is the writer thread. */
+		if (lock->lock_word == 0) {
+			lock->lock_word = -X_LOCK_DECR;
+		} else {
+			lock->lock_word--;
+		}
+
+		/* Watch for too many recursive locks */
+		ut_ad(lock->lock_word < 0);
+
+	} else {
+		/* Failure */
+		return(FALSE);
+	}
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+#endif
+
+	lock->last_x_file_name = file_name;
+	lock->last_x_line = line;
+
+	ut_ad(rw_lock_validate(lock));
+
+	return(TRUE);
+}
+
+/******************************************************************//**
+Releases a shared mode lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	ut_ad(lock->lock_word > -X_LOCK_DECR);
+	ut_ad(lock->lock_word != 0);
+	ut_ad(lock->lock_word < X_LOCK_DECR);
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
+#endif
+
+	/* Increment lock_word to indicate 1 less reader */
+	if (rw_lock_lock_word_incr(lock, 1) == 0) {
+
+		/* wait_ex waiter exists. It may not be asleep, but we signal
+		anyway. We do not wake other waiters, because they can't
+		exist without wait_ex waiter and wait_ex waiter goes first.*/
+		os_event_set(lock->wait_ex_event);
+		sync_array_object_signalled();
+
+	}
+
+	ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+	rw_s_exit_count++;
+#endif
+}
+
+/******************************************************************//**
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
+
+	/* lock->recursive flag also indicates if lock->writer_thread is
+	valid or stale. If we are the last of the recursive callers
+	then we must unset lock->recursive flag to indicate that the
+	lock->writer_thread is now stale.
+	Note that since we still hold the x-lock we can safely read the
+	lock_word. */
+	if (lock->lock_word == 0) {
+		/* Last caller in a possible recursive chain. */
+		lock->recursive = FALSE;
+	}
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
+#endif
+
+	ulint x_lock_incr;
+	if (lock->lock_word == 0) {
+		x_lock_incr = X_LOCK_DECR;
+	} else if (lock->lock_word == -X_LOCK_DECR) {
+		x_lock_incr = X_LOCK_DECR;
+	} else {
+		ut_ad(lock->lock_word < -X_LOCK_DECR);
+		x_lock_incr = 1;
+	}
+
+	if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
+		/* Lock is now free. May have to signal read/write waiters.
+		We do not need to signal wait_ex waiters, since they cannot
+		exist when there is a writer. */
+		if (lock->waiters) {
+			rw_lock_reset_waiter_flag(lock);
+			os_event_set(lock->event);
+			sync_array_object_signalled();
+		}
+	}
+
+	ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+	rw_x_exit_count++;
+#endif
+}
+
+#ifdef UNIV_PFS_RWLOCK
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_create_func().
+NOTE! Please use the corresponding macro rw_lock_create(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_create_func(
+/*====================*/
+	mysql_pfs_key_t	key,		/*!< in: key registered with
+					performance schema */
+	rw_lock_t*	lock,		/*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+#  ifdef UNIV_SYNC_DEBUG
+	ulint		level,		/*!< in: level */
+#  endif /* UNIV_SYNC_DEBUG */
+	const char*	cmutex_name,	/*!< in: mutex name */
+# endif /* UNIV_DEBUG */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline)		/*!< in: file line where created */
+{
+	/* Initialize the rwlock for performance schema */
+	lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
+
+	/* The actual function to initialize an rwlock */
+	rw_lock_create_func(lock,
+# ifdef UNIV_DEBUG
+#  ifdef UNIV_SYNC_DEBUG
+			    level,
+#  endif /* UNIV_SYNC_DEBUG */
+			    cmutex_name,
+# endif /* UNIV_DEBUG */
+			    cfile_name,
+			    cline);
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	if (lock->pfs_psi != NULL)
+	{
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
+
+		/* Record the entry of rw x lock request in performance schema */
+		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+			file_name, static_cast<uint>(line));
+
+		rw_lock_x_lock_func(
+			lock, pass, file_name, static_cast<uint>(line));
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
+		}
+	}
+	else
+	{
+		rw_lock_x_lock_func(lock, pass, file_name, line);
+	}
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for
+rw_lock_x_lock_func_nowait()
+NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
+not directly this function!
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_x_lock_func_nowait(
+/*===========================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	const char*	file_name,/*!< in: file name where lock
+				requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	ibool	ret;
+
+	if (lock->pfs_psi != NULL)
+	{
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state		state;
+
+		/* Record the entry of rw x lock request in performance schema */
+		locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
+			&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
+			file_name, static_cast<uint>(line));
+
+		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_wrwait)(
+				locker, static_cast<int>(ret));
+		}
+	}
+	else
+	{
+		ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
+	}
+
+	return(ret);
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_free_func()
+NOTE! Please use the corresponding macro rw_lock_free(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_free_func(
+/*==================*/
+	rw_lock_t*	lock)	/*!< in: pointer to rw-lock */
+{
+	if (lock->pfs_psi != NULL)
+	{
+		PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
+		lock->pfs_psi = NULL;
+	}
+
+	rw_lock_free_func(lock);
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_lock_func(
+/*====================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name,/*!< in: file name where lock
+				requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	if (lock->pfs_psi != NULL)
+	{
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
+
+		/* Instrumented to inform we are aquiring a shared rwlock */
+		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+			file_name, static_cast<uint>(line));
+
+		rw_lock_s_lock_func(lock, pass, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
+		}
+	}
+	else
+	{
+		rw_lock_s_lock_func(lock, pass, file_name, line);
+	}
+
+	return;
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not
+directly this function!
+@return	TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_s_lock_low(
+/*===================*/
+	rw_lock_t*	lock,	/*!< in: pointer to rw-lock */
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock will be passed to another
+				thread to unlock */
+	const char*	file_name, /*!< in: file name where lock requested */
+	ulint		line)	/*!< in: line where requested */
+{
+	ibool	ret;
+
+	if (lock->pfs_psi != NULL)
+	{
+		PSI_rwlock_locker*	locker;
+		PSI_rwlock_locker_state	state;
+
+		/* Instrumented to inform we are aquiring a shared rwlock */
+		locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
+			&state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
+			file_name, static_cast<uint>(line));
+
+		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
+
+		if (locker != NULL) {
+			PSI_RWLOCK_CALL(end_rwlock_rdwait)(
+				locker, static_cast<int>(ret));
+		}
+	}
+	else
+	{
+		ret = rw_lock_s_lock_low(lock, pass, file_name, line);
+	}
+
+	return(ret);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	/* Inform performance schema we are unlocking the lock */
+	if (lock->pfs_psi != NULL)
+		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+
+	rw_lock_x_unlock_func(
+#ifdef UNIV_SYNC_DEBUG
+		pass,
+#endif
+		lock);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+	ulint		pass,	/*!< in: pass value; != 0, if the
+				lock may have been passed to another
+				thread to unlock */
+#endif
+	rw_lock_t*	lock)	/*!< in/out: rw-lock */
+{
+	/* Inform performance schema we are unlocking the lock */
+	if (lock->pfs_psi != NULL)
+		PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
+
+	rw_lock_s_unlock_func(
+#ifdef UNIV_SYNC_DEBUG
+		pass,
+#endif
+		lock);
+
+}
+#endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
new file mode 100644
index 00000000000..82fb353a41b
--- /dev/null
+++ b/storage/innobase/include/sync0sync.h
@@ -0,0 +1,845 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0sync.h
+Mutex, the basic synchronization primitive
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0sync_h
+#define sync0sync_h
+
+#include "univ.i"
+#include "sync0types.h"
+#include "ut0lst.h"
+#include "ut0mem.h"
+#include "os0thread.h"
+#include "os0sync.h"
+#include "sync0arr.h"
+
+#if  defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
+extern "C" my_bool	timed_mutexes;
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+#ifdef HAVE_WINDOWS_ATOMICS
+typedef LONG lock_word_t;	/*!< On Windows, InterlockedExchange operates
+				on LONG variable */
+#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
+typedef ulint lock_word_t;
+#else
+typedef byte lock_word_t;
+#endif
+
+#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
+
+/* By default, buffer mutexes and rwlocks will be excluded from
+instrumentation due to their large number of instances. */
+# define PFS_SKIP_BUFFER_MUTEX_RWLOCK
+
+/* By default, event->mutex will also be excluded from instrumentation */
+# define PFS_SKIP_EVENT_MUTEX
+
+#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
+
+#ifdef UNIV_PFS_MUTEX
+/* Key defines to register InnoDB mutexes with performance schema */
+extern mysql_pfs_key_t	autoinc_mutex_key;
+extern mysql_pfs_key_t	buffer_block_mutex_key;
+extern mysql_pfs_key_t	buf_pool_mutex_key;
+extern mysql_pfs_key_t	buf_pool_zip_mutex_key;
+extern mysql_pfs_key_t	cache_last_read_mutex_key;
+extern mysql_pfs_key_t	dict_foreign_err_mutex_key;
+extern mysql_pfs_key_t	dict_sys_mutex_key;
+extern mysql_pfs_key_t	file_format_max_mutex_key;
+extern mysql_pfs_key_t	fil_system_mutex_key;
+extern mysql_pfs_key_t	flush_list_mutex_key;
+extern mysql_pfs_key_t	fts_bg_threads_mutex_key;
+extern mysql_pfs_key_t	fts_delete_mutex_key;
+extern mysql_pfs_key_t	fts_optimize_mutex_key;
+extern mysql_pfs_key_t	fts_doc_id_mutex_key;
+extern mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
+extern mysql_pfs_key_t	hash_table_mutex_key;
+extern mysql_pfs_key_t	ibuf_bitmap_mutex_key;
+extern mysql_pfs_key_t	ibuf_mutex_key;
+extern mysql_pfs_key_t	ibuf_pessimistic_insert_mutex_key;
+extern mysql_pfs_key_t	log_sys_mutex_key;
+extern mysql_pfs_key_t	log_flush_order_mutex_key;
+# ifndef HAVE_ATOMIC_BUILTINS
+extern mysql_pfs_key_t	server_mutex_key;
+# endif /* !HAVE_ATOMIC_BUILTINS */
+# ifdef UNIV_MEM_DEBUG
+extern mysql_pfs_key_t	mem_hash_mutex_key;
+# endif /* UNIV_MEM_DEBUG */
+extern mysql_pfs_key_t	mem_pool_mutex_key;
+extern mysql_pfs_key_t	mutex_list_mutex_key;
+extern mysql_pfs_key_t	purge_sys_bh_mutex_key;
+extern mysql_pfs_key_t	recv_sys_mutex_key;
+extern mysql_pfs_key_t	recv_writer_mutex_key;
+extern mysql_pfs_key_t	rseg_mutex_key;
+# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t	rw_lock_debug_mutex_key;
+# endif /* UNIV_SYNC_DEBUG */
+extern mysql_pfs_key_t	rw_lock_list_mutex_key;
+extern mysql_pfs_key_t	rw_lock_mutex_key;
+extern mysql_pfs_key_t	srv_dict_tmpfile_mutex_key;
+extern mysql_pfs_key_t	srv_innodb_monitor_mutex_key;
+extern mysql_pfs_key_t	srv_misc_tmpfile_mutex_key;
+extern mysql_pfs_key_t	srv_threads_mutex_key;
+extern mysql_pfs_key_t	srv_monitor_file_mutex_key;
+# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t	sync_thread_mutex_key;
+# endif /* UNIV_SYNC_DEBUG */
+extern mysql_pfs_key_t	buf_dblwr_mutex_key;
+extern mysql_pfs_key_t	trx_undo_mutex_key;
+extern mysql_pfs_key_t	trx_mutex_key;
+extern mysql_pfs_key_t	lock_sys_mutex_key;
+extern mysql_pfs_key_t	lock_sys_wait_mutex_key;
+extern mysql_pfs_key_t	trx_sys_mutex_key;
+extern mysql_pfs_key_t	srv_sys_mutex_key;
+extern mysql_pfs_key_t	srv_sys_tasks_mutex_key;
+#ifndef HAVE_ATOMIC_BUILTINS
+extern mysql_pfs_key_t	srv_conc_mutex_key;
+#endif /* !HAVE_ATOMIC_BUILTINS */
+#ifndef HAVE_ATOMIC_BUILTINS_64
+extern mysql_pfs_key_t	monitor_mutex_key;
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
+extern mysql_pfs_key_t	event_os_mutex_key;
+extern mysql_pfs_key_t	ut_list_mutex_key;
+extern mysql_pfs_key_t	os_mutex_key;
+extern mysql_pfs_key_t  zip_pad_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+/******************************************************************//**
+Initializes the synchronization data structures. */
+UNIV_INTERN
+void
+sync_init(void);
+/*===========*/
+/******************************************************************//**
+Frees the resources in synchronization data structures. */
+UNIV_INTERN
+void
+sync_close(void);
+/*===========*/
+
+#undef mutex_free			/* Fix for MacOS X */
+
+#ifdef UNIV_PFS_MUTEX
+/**********************************************************************
+Following mutex APIs would be performance schema instrumented
+if "UNIV_PFS_MUTEX" is defined:
+
+mutex_create
+mutex_enter
+mutex_exit
+mutex_enter_nowait
+mutex_free
+
+These mutex APIs will point to corresponding wrapper functions that contain
+the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
+The instrumented wrapper functions have the prefix of "innodb_".
+
+NOTE! The following macro should be used in mutex operation, not the
+corresponding function. */
+
+/******************************************************************//**
+Creates, or rather, initializes a mutex object to a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+# ifdef UNIV_DEBUG
+#  ifdef UNIV_SYNC_DEBUG
+#   define mutex_create(K, M, level)				\
+	pfs_mutex_create_func((K), (M), #M, (level), __FILE__, __LINE__)
+#  else
+#   define mutex_create(K, M, level)				\
+	pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
+#  endif/* UNIV_SYNC_DEBUG */
+# else
+#  define mutex_create(K, M, level)				\
+	pfs_mutex_create_func((K), (M), __FILE__, __LINE__)
+# endif	/* UNIV_DEBUG */
+
+# define mutex_enter(M)						\
+	pfs_mutex_enter_func((M), __FILE__, __LINE__)
+
+# define mutex_enter_nowait(M)					\
+	pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
+
+# define mutex_exit(M)	pfs_mutex_exit_func(M)
+
+# define mutex_free(M)	pfs_mutex_free_func(M)
+
+#else	/* UNIV_PFS_MUTEX */
+
+/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
+original non-instrumented functions */
+# ifdef UNIV_DEBUG
+#  ifdef UNIV_SYNC_DEBUG
+#   define mutex_create(K, M, level)			\
+	mutex_create_func((M), #M, (level), __FILE__, __LINE__)
+#  else /* UNIV_SYNC_DEBUG */
+#   define mutex_create(K, M, level)				\
+	mutex_create_func((M), #M, __FILE__, __LINE__)
+#  endif /* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
+#  define mutex_create(K, M, level)				\
+	mutex_create_func((M), __FILE__, __LINE__)
+# endif	/* UNIV_DEBUG */
+
+# define mutex_enter(M)	mutex_enter_func((M), __FILE__, __LINE__)
+
+# define mutex_enter_nowait(M)	\
+	mutex_enter_nowait_func((M), __FILE__, __LINE__)
+
+# define mutex_exit(M)	mutex_exit_func(M)
+
+# define mutex_free(M)	mutex_free_func(M)
+
+#endif	/* UNIV_PFS_MUTEX */
+
+/******************************************************************//**
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+UNIV_INTERN
+void
+mutex_create_func(
+/*==============*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
+#ifdef UNIV_DEBUG
+	const char*	cmutex_name,	/*!< in: mutex name */
+# ifdef UNIV_SYNC_DEBUG
+	ulint		level,		/*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline);		/*!< in: file line where created */
+
+/******************************************************************//**
+NOTE! Use the corresponding macro mutex_free(), not directly this function!
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a mutex object from the mutex list. The mutex
+is checked to be in the reset state. */
+UNIV_INTERN
+void
+mutex_free_func(
+/*============*/
+	ib_mutex_t*	mutex);	/*!< in: mutex */
+/**************************************************************//**
+NOTE! The following macro should be used in mutex locking, not the
+corresponding function. */
+
+/* NOTE! currently same as mutex_enter! */
+
+#define mutex_enter_fast(M)	mutex_enter_func((M), __FILE__, __LINE__)
+/******************************************************************//**
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Locks a mutex for the current thread. If the mutex is reserved
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
+for the mutex before suspending the thread. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where locked */
+	ulint		line);		/*!< in: line where locked */
+/********************************************************************//**
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Tries to lock the mutex for the current thread. If the lock is not
+acquired immediately, returns with return value 1.
+@return	0 if succeed, 1 if not */
+UNIV_INTERN
+ulint
+mutex_enter_nowait_func(
+/*====================*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
+					requested */
+	ulint		line);		/*!< in: line where requested */
+/******************************************************************//**
+NOTE! Use the corresponding macro mutex_exit(), not directly this function!
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit_func(
+/*============*/
+	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
+
+
+#ifdef UNIV_PFS_MUTEX
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_create(), not directly
+this function!
+A wrapper function for mutex_create_func(), registers the mutex
+with peformance schema if "UNIV_PFS_MUTEX" is defined when
+creating the mutex */
+UNIV_INLINE
+void
+pfs_mutex_create_func(
+/*==================*/
+	PSI_mutex_key	key,		/*!< in: Performance Schema key */
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+	const char*	cmutex_name,	/*!< in: mutex name */
+#  ifdef UNIV_SYNC_DEBUG
+	ulint		level,		/*!< in: level */
+#  endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline);		/*!< in: file line where created */
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_func(). */
+UNIV_INLINE
+void
+pfs_mutex_enter_func(
+/*=================*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where locked */
+	ulint		line);		/*!< in: line where locked */
+/********************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_nowait_func.
+@return	0 if succeed, 1 if not */
+UNIV_INLINE
+ulint
+pfs_mutex_enter_nowait_func(
+/*========================*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
+					requested */
+	ulint		line);		/*!< in: line where requested */
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_exit(), not directly
+this function!
+A wrap function of mutex_exit_func() with peformance schema instrumentation.
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+pfs_mutex_exit_func(
+/*================*/
+	ib_mutex_t*	mutex);	/*!< in: pointer to mutex */
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_free(), not directly
+this function!
+Wrapper function for mutex_free_func(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_mutex_free_func(
+/*================*/
+	ib_mutex_t*	mutex);	/*!< in: mutex */
+
+#endif /* UNIV_PFS_MUTEX */
+
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Returns TRUE if no mutex or rw-lock is currently locked.
+Works only in the debug version.
+@return	TRUE if no mutexes and rw-locks reserved */
+UNIV_INTERN
+ibool
+sync_all_freed(void);
+/*================*/
+#endif /* UNIV_SYNC_DEBUG */
+/*#####################################################################
+FUNCTION PROTOTYPES FOR DEBUGGING */
+/*******************************************************************//**
+Prints wait info of the sync system. */
+UNIV_INTERN
+void
+sync_print_wait_info(
+/*=================*/
+	FILE*	file);		/*!< in: file where to print */
+/*******************************************************************//**
+Prints info of the sync system. */
+UNIV_INTERN
+void
+sync_print(
+/*=======*/
+	FILE*	file);		/*!< in: file where to print */
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Checks that the mutex has been initialized.
+@return	TRUE */
+UNIV_INTERN
+ibool
+mutex_validate(
+/*===========*/
+	const ib_mutex_t*	mutex);	/*!< in: mutex */
+/******************************************************************//**
+Checks that the current thread owns the mutex. Works only
+in the debug version.
+@return	TRUE if owns */
+UNIV_INTERN
+ibool
+mutex_own(
+/*======*/
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
+	__attribute__((warn_unused_result));
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread. */
+UNIV_INTERN
+void
+sync_thread_add_level(
+/*==================*/
+	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
+	ulint	level,	/*!< in: level in the latching order; if
+			SYNC_LEVEL_VARYING, nothing is done */
+	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
+	__attribute__((nonnull));
+/******************************************************************//**
+Removes a latch from the thread level array if it is found there.
+@return TRUE if found in the array; it is no error if the latch is
+not found, as we presently are not able to determine the level for
+every latch reservation the program does */
+UNIV_INTERN
+ibool
+sync_thread_reset_level(
+/*====================*/
+	void*	latch);	/*!< in: pointer to a mutex or an rw-lock */
+/******************************************************************//**
+Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@return	a matching latch, or NULL if not found */
+UNIV_INTERN
+void*
+sync_thread_levels_contains(
+/*========================*/
+	ulint	level);			/*!< in: latching order level
+					(SYNC_DICT, ...)*/
+/******************************************************************//**
+Checks that the level array for the current thread is empty.
+@return	a latch, or NULL if empty except the exceptions specified below */
+UNIV_INTERN
+void*
+sync_thread_levels_nonempty_gen(
+/*============================*/
+	ibool	dict_mutex_allowed)	/*!< in: TRUE if dictionary mutex is
+					allowed to be owned by the thread */
+	__attribute__((warn_unused_result));
+/******************************************************************//**
+Checks if the level array for the current thread is empty,
+except for data dictionary latches. */
+#define sync_thread_levels_empty_except_dict()		\
+	(!sync_thread_levels_nonempty_gen(TRUE))
+/******************************************************************//**
+Checks if the level array for the current thread is empty,
+except for the btr_search_latch.
+@return	a latch, or NULL if empty except the exceptions specified below */
+UNIV_INTERN
+void*
+sync_thread_levels_nonempty_trx(
+/*============================*/
+	ibool	has_search_latch)
+				/*!< in: TRUE if and only if the thread
+				is supposed to hold btr_search_latch */
+	__attribute__((warn_unused_result));
+
+/******************************************************************//**
+Gets the debug information for a reserved mutex. */
+UNIV_INTERN
+void
+mutex_get_debug_info(
+/*=================*/
+	ib_mutex_t*	mutex,		/*!< in: mutex */
+	const char**	file_name,	/*!< out: file where requested */
+	ulint*		line,		/*!< out: line where requested */
+	os_thread_id_t* thread_id);	/*!< out: id of the thread which owns
+					the mutex */
+/******************************************************************//**
+Counts currently reserved mutexes. Works only in the debug version.
+@return	number of reserved mutexes */
+UNIV_INTERN
+ulint
+mutex_n_reserved(void);
+/*==================*/
+#endif /* UNIV_SYNC_DEBUG */
+/******************************************************************//**
+NOT to be used outside this module except in debugging! Gets the value
+of the lock word. */
+UNIV_INLINE
+lock_word_t
+mutex_get_lock_word(
+/*================*/
+	const ib_mutex_t*	mutex);	/*!< in: mutex */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+NOT to be used outside this module except in debugging! Gets the waiters
+field in a mutex.
+@return	value to set */
+UNIV_INLINE
+ulint
+mutex_get_waiters(
+/*==============*/
+	const ib_mutex_t*	mutex);	/*!< in: mutex */
+#endif /* UNIV_SYNC_DEBUG */
+
+/*
+		LATCHING ORDER WITHIN THE DATABASE
+		==================================
+
+The mutex or latch in the central memory object, for instance, a rollback
+segment object, must be acquired before acquiring the latch or latches to
+the corresponding file data structure. In the latching order below, these
+file page object latches are placed immediately below the corresponding
+central memory object latch or mutex.
+
+Synchronization object			Notes
+----------------------			-----
+
+Dictionary mutex			If we have a pointer to a dictionary
+|					object, e.g., a table, it can be
+|					accessed without reserving the
+|					dictionary mutex. We must have a
+|					reservation, a memoryfix, to the
+|					appropriate table object in this case,
+|					and the table must be explicitly
+|					released later.
+V
+Dictionary header
+|
+V
+Secondary index tree latch		The tree latch protects also all
+|					the B-tree non-leaf pages. These
+V					can be read with the page only
+Secondary index non-leaf		bufferfixed to save CPU time,
+|					no s-latch is needed on the page.
+|					Modification of a page requires an
+|					x-latch on the page, however. If a
+|					thread owns an x-latch to the tree,
+|					it is allowed to latch non-leaf pages
+|					even after it has acquired the fsp
+|					latch.
+V
+Secondary index leaf			The latch on the secondary index leaf
+|					can be kept while accessing the
+|					clustered index, to save CPU time.
+V
+Clustered index tree latch		To increase concurrency, the tree
+|					latch is usually released when the
+|					leaf page latch has been acquired.
+V
+Clustered index non-leaf
+|
+V
+Clustered index leaf
+|
+V
+Transaction system header
+|
+V
+Transaction undo mutex			The undo log entry must be written
+|					before any index page is modified.
+|					Transaction undo mutex is for the undo
+|					logs the analogue of the tree latch
+|					for a B-tree. If a thread has the
+|					trx undo mutex reserved, it is allowed
+|					to latch the undo log pages in any
+|					order, and also after it has acquired
+|					the fsp latch.
+V
+Rollback segment mutex			The rollback segment mutex must be
+|					reserved, if, e.g., a new page must
+|					be added to an undo log. The rollback
+|					segment and the undo logs in its
+|					history list can be seen as an
+|					analogue of a B-tree, and the latches
+|					reserved similarly, using a version of
+|					lock-coupling. If an undo log must be
+|					extended by a page when inserting an
+|					undo log record, this corresponds to
+|					a pessimistic insert in a B-tree.
+V
+Rollback segment header
+|
+V
+Purge system latch
+|
+V
+Undo log pages				If a thread owns the trx undo mutex,
+|					or for a log in the history list, the
+|					rseg mutex, it is allowed to latch
+|					undo log pages in any order, and even
+|					after it has acquired the fsp latch.
+|					If a thread does not have the
+|					appropriate mutex, it is allowed to
+|					latch only a single undo log page in
+|					a mini-transaction.
+V
+File space management latch		If a mini-transaction must allocate
+|					several file pages, it can do that,
+|					because it keeps the x-latch to the
+|					file space management in its memo.
+V
+File system pages
+|
+V
+lock_sys_wait_mutex			Mutex protecting lock timeout data
+|
+V
+lock_sys_mutex				Mutex protecting lock_sys_t
+|
+V
+trx_sys->mutex				Mutex protecting trx_sys_t
+|
+V
+Threads mutex				Background thread scheduling mutex
+|
+V
+query_thr_mutex				Mutex protecting query threads
+|
+V
+trx_mutex				Mutex protecting trx_t fields
+|
+V
+Search system mutex
+|
+V
+Buffer pool mutex
+|
+V
+Log mutex
+|
+Any other latch
+|
+V
+Memory pool mutex */
+
+/* Latching order levels. If you modify these, you have to also update
+sync_thread_add_level(). */
+
+/* User transaction locks are higher than any of the latch levels below:
+no latches are allowed when a thread goes to wait for a normal table
+or row lock! */
+#define SYNC_USER_TRX_LOCK	9999
+#define SYNC_NO_ORDER_CHECK	3000	/* this can be used to suppress
+					latching order checking */
+#define	SYNC_LEVEL_VARYING	2000	/* Level is varying. Only used with
+					buffer pool page locks, which do not
+					have a fixed level, but instead have
+					their level set after the page is
+					locked; see e.g.
+					ibuf_bitmap_get_map_page(). */
+#define SYNC_TRX_I_S_RWLOCK	1910	/* Used for
+					trx_i_s_cache_t::rw_lock */
+#define SYNC_TRX_I_S_LAST_READ	1900	/* Used for
+					trx_i_s_cache_t::last_read_mutex */
+#define SYNC_FILE_FORMAT_TAG	1200	/* Used to serialize access to the
+					file format tag */
+#define	SYNC_DICT_OPERATION	1010	/* table create, drop, etc. reserve
+					this in X-mode; implicit or backround
+					operations purge, rollback, foreign
+					key checks reserve this in S-mode */
+#define SYNC_FTS_CACHE		1005	/* FTS cache rwlock */
+#define SYNC_DICT		1000
+#define SYNC_DICT_AUTOINC_MUTEX	999
+#define SYNC_STATS_AUTO_RECALC	997
+#define SYNC_DICT_HEADER	995
+#define SYNC_IBUF_HEADER	914
+#define SYNC_IBUF_PESS_INSERT_MUTEX 912
+/*-------------------------------*/
+#define	SYNC_INDEX_TREE		900
+#define SYNC_TREE_NODE_NEW	892
+#define SYNC_TREE_NODE_FROM_HASH 891
+#define SYNC_TREE_NODE		890
+#define	SYNC_PURGE_LATCH	800
+#define	SYNC_TRX_UNDO		700
+#define SYNC_RSEG		600
+#define SYNC_RSEG_HEADER_NEW	591
+#define SYNC_RSEG_HEADER	590
+#define SYNC_TRX_UNDO_PAGE	570
+#define SYNC_EXTERN_STORAGE	500
+#define	SYNC_FSP		400
+#define	SYNC_FSP_PAGE		395
+/*------------------------------------- Change buffer headers */
+#define SYNC_IBUF_MUTEX		370	/* ibuf_mutex */
+/*------------------------------------- Change buffer tree */
+#define SYNC_IBUF_INDEX_TREE	360
+#define SYNC_IBUF_TREE_NODE_NEW	359
+#define SYNC_IBUF_TREE_NODE	358
+#define	SYNC_IBUF_BITMAP_MUTEX	351
+#define	SYNC_IBUF_BITMAP	350
+/*------------------------------------- Change log for online create index */
+#define SYNC_INDEX_ONLINE_LOG	340
+/*------------------------------------- MySQL query cache mutex */
+/*------------------------------------- MySQL binlog mutex */
+/*-------------------------------*/
+#define SYNC_LOCK_WAIT_SYS	300
+#define SYNC_LOCK_SYS		299
+#define SYNC_TRX_SYS		298
+#define SYNC_TRX		297
+#define SYNC_THREADS		295
+#define SYNC_REC_LOCK		294
+#define SYNC_TRX_SYS_HEADER	290
+#define	SYNC_PURGE_QUEUE	200
+#define SYNC_LOG		170
+#define SYNC_LOG_FLUSH_ORDER	147
+#define SYNC_RECV		168
+#define SYNC_FTS_TOKENIZE	167
+#define SYNC_FTS_CACHE_INIT	166	/* Used for FTS cache initialization */
+#define SYNC_FTS_BG_THREADS	165
+#define SYNC_FTS_OPTIMIZE       164     // FIXME: is this correct number, test
+#define	SYNC_WORK_QUEUE		162
+#define	SYNC_SEARCH_SYS		160	/* NOTE that if we have a memory
+					heap that can be extended to the
+					buffer pool, its logical level is
+					SYNC_SEARCH_SYS, as memory allocation
+					can call routines there! Otherwise
+					the level is SYNC_MEM_HASH. */
+#define	SYNC_BUF_POOL		150	/* Buffer pool mutex */
+#define	SYNC_BUF_PAGE_HASH	149	/* buf_pool->page_hash rw_lock */
+#define	SYNC_BUF_BLOCK		146	/* Block mutex */
+#define	SYNC_BUF_FLUSH_LIST	145	/* Buffer flush list mutex */
+#define SYNC_DOUBLEWRITE	140
+#define	SYNC_ANY_LATCH		135
+#define	SYNC_MEM_HASH		131
+#define	SYNC_MEM_POOL		130
+
+/* Codes used to designate lock operations */
+#define RW_LOCK_NOT_LOCKED	350
+#define RW_LOCK_EX		351
+#define RW_LOCK_EXCLUSIVE	351
+#define RW_LOCK_SHARED		352
+#define RW_LOCK_WAIT_EX		353
+#define SYNC_MUTEX		354
+
+/* NOTE! The structure appears here only for the compiler to know its size.
+Do not use its fields directly! The structure used in the spin lock
+implementation of a mutual exclusion semaphore. */
+
+/** InnoDB mutex */
+struct ib_mutex_t {
+	os_event_t	event;	/*!< Used by sync0arr.cc for the wait queue */
+	volatile lock_word_t	lock_word;	/*!< lock_word is the target
+				of the atomic test-and-set instruction when
+				atomic operations are enabled. */
+
+#if !defined(HAVE_ATOMIC_BUILTINS)
+	os_fast_mutex_t
+		os_fast_mutex;	/*!< We use this OS mutex in place of lock_word
+				when atomic operations are not enabled */
+#endif
+	ulint	waiters;	/*!< This ulint is set to 1 if there are (or
+				may be) threads waiting in the global wait
+				array for this mutex to be released.
+				Otherwise, this is 0. */
+	UT_LIST_NODE_T(ib_mutex_t)	list; /*!< All allocated mutexes are put into
+				a list.	Pointers to the next and prev. */
+#ifdef UNIV_SYNC_DEBUG
+	const char*	file_name;	/*!< File where the mutex was locked */
+	ulint	line;		/*!< Line where the mutex was locked */
+	ulint	level;		/*!< Level in the global latching order */
+#endif /* UNIV_SYNC_DEBUG */
+	const char*	cfile_name;/*!< File name where mutex created */
+	ulint		cline;	/*!< Line where created */
+	ulong		count_os_wait;	/*!< count of os_wait */
+#ifdef UNIV_DEBUG
+
+/** Value of mutex_t::magic_n */
+# define MUTEX_MAGIC_N	979585UL
+
+	os_thread_id_t thread_id; /*!< The thread id of the thread
+				which locked the mutex. */
+	ulint		magic_n;	/*!< MUTEX_MAGIC_N */
+	const char*	cmutex_name;	/*!< mutex name */
+	ulint		ib_mutex_type;	/*!< 0=usual mutex, 1=rw_lock mutex */
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_PFS_MUTEX
+	struct PSI_mutex* pfs_psi;	/*!< The performance schema
+					instrumentation hook */
+#endif
+};
+
+/** Constant determining how long spin wait is continued before suspending
+the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
+to 20 microseconds. */
+
+#define	SYNC_SPIN_ROUNDS	srv_n_spin_wait_rounds
+
+/** The number of mutex_exit calls. Intended for performance monitoring. */
+extern	ib_int64_t	mutex_exit_count;
+
+#ifdef UNIV_SYNC_DEBUG
+/** Latching order checks start when this is set TRUE */
+extern ibool	sync_order_checks_on;
+#endif /* UNIV_SYNC_DEBUG */
+
+/** This variable is set to TRUE when sync_init is called */
+extern ibool	sync_initialized;
+
+/** Global list of database mutexes (not OS mutexes) created. */
+typedef UT_LIST_BASE_NODE_T(ib_mutex_t)  ut_list_base_node_t;
+/** Global list of database mutexes (not OS mutexes) created. */
+extern ut_list_base_node_t  mutex_list;
+
+/** Mutex protecting the mutex_list variable */
+extern ib_mutex_t mutex_list_mutex;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+/**********************************************************//**
+Function that uses a mutex to decrement a variable atomically */
+UNIV_INLINE
+void
+os_atomic_dec_ulint_func(
+/*=====================*/
+	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
+						decrement */
+	volatile ulint*		var,		/*!< in/out: variable to
+						decrement */
+	ulint			delta);		/*!< in: delta to decrement */
+/**********************************************************//**
+Function that uses a mutex to increment a variable atomically */
+UNIV_INLINE
+void
+os_atomic_inc_ulint_func(
+/*=====================*/
+	ib_mutex_t*		mutex,		/*!< in: mutex guarding the
+						increment */
+	volatile ulint*		var,		/*!< in/out: variable to
+						increment */
+	ulint			delta);		/*!< in: delta to increment */
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+#ifndef UNIV_NONINL
+#include "sync0sync.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
new file mode 100644
index 00000000000..616e53d4aac
--- /dev/null
+++ b/storage/innobase/include/sync0sync.ic
@@ -0,0 +1,414 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0sync.ic
+Mutex, the basic synchronization primitive
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+/******************************************************************//**
+Sets the waiters field in a mutex. */
+UNIV_INTERN
+void
+mutex_set_waiters(
+/*==============*/
+	ib_mutex_t*	mutex,	/*!< in: mutex */
+	ulint		n);	/*!< in: value to set */
+/******************************************************************//**
+Reserves a mutex for the current thread. If the mutex is reserved, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
+for the mutex before suspending the thread. */
+UNIV_INTERN
+void
+mutex_spin_wait(
+/*============*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
+					requested */
+	ulint		line);		/*!< in: line where requested */
+#ifdef UNIV_SYNC_DEBUG
+/******************************************************************//**
+Sets the debug information for a reserved mutex. */
+UNIV_INTERN
+void
+mutex_set_debug_info(
+/*=================*/
+	ib_mutex_t*	mutex,		/*!< in: mutex */
+	const char*	file_name,	/*!< in: file where requested */
+	ulint		line);		/*!< in: line where requested */
+#endif /* UNIV_SYNC_DEBUG */
+/******************************************************************//**
+Releases the threads waiting in the primary wait array for this mutex. */
+UNIV_INTERN
+void
+mutex_signal_object(
+/*================*/
+	ib_mutex_t*	mutex);	/*!< in: mutex */
+
+/******************************************************************//**
+Performs an atomic test-and-set instruction to the lock_word field of a
+mutex.
+@return	the previous value of lock_word: 0 or 1 */
+UNIV_INLINE
+byte
+ib_mutex_test_and_set(
+/*===============*/
+	ib_mutex_t*	mutex)	/*!< in: mutex */
+{
+#if defined(HAVE_ATOMIC_BUILTINS)
+# if defined(HAVE_ATOMIC_BUILTINS_BYTE)
+	return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
+# else
+	return(os_atomic_test_and_set_ulint(&mutex->lock_word, 1));
+# endif
+#else
+	ibool	ret;
+
+	ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex));
+
+	if (ret == 0) {
+		/* We check that os_fast_mutex_trylock does not leak
+		and allow race conditions */
+		ut_a(mutex->lock_word == 0);
+
+		mutex->lock_word = 1;
+		os_wmb;
+	}
+
+	return((byte) ret);
+#endif
+}
+
+/******************************************************************//**
+Performs a reset instruction to the lock_word field of a mutex. This
+instruction also serializes memory operations to the program order. */
+UNIV_INLINE
+void
+mutex_reset_lock_word(
+/*==================*/
+	ib_mutex_t*	mutex)	/*!< in: mutex */
+{
+#if defined(HAVE_ATOMIC_BUILTINS)
+	/* In theory __sync_lock_release should be used to release the lock.
+	Unfortunately, it does not work properly alone. The workaround is
+	that more conservative __sync_lock_test_and_set is used instead. */
+# if defined(HAVE_ATOMIC_BUILTINS_BYTE)
+	os_atomic_test_and_set_byte(&mutex->lock_word, 0);
+# else
+	os_atomic_test_and_set_ulint(&mutex->lock_word, 0);
+# endif
+#else
+	mutex->lock_word = 0;
+
+	os_fast_mutex_unlock(&(mutex->os_fast_mutex));
+#endif
+}
+
+/******************************************************************//**
+Gets the value of the lock word. */
+UNIV_INLINE
+lock_word_t
+mutex_get_lock_word(
+/*================*/
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
+{
+	ut_ad(mutex);
+
+	return(mutex->lock_word);
+}
+
+/******************************************************************//**
+Gets the waiters field in a mutex.
+@return	value to set */
+UNIV_INLINE
+ulint
+mutex_get_waiters(
+/*==============*/
+	const ib_mutex_t*	mutex)	/*!< in: mutex */
+{
+	const volatile ulint*	ptr;	/*!< declared volatile to ensure that
+					the value is read from memory */
+	ut_ad(mutex);
+
+	ptr = &(mutex->waiters);
+
+	return(*ptr);		/* Here we assume that the read of a single
+				word from memory is atomic */
+}
+
+/******************************************************************//**
+NOTE! Use the corresponding macro mutex_exit(), not directly this function!
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit_func(
+/*============*/
+	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
+{
+	ut_ad(mutex_own(mutex));
+
+	ut_d(mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED);
+
+#ifdef UNIV_SYNC_DEBUG
+	sync_thread_reset_level(mutex);
+#endif
+	mutex_reset_lock_word(mutex);
+
+	/* A problem: we assume that mutex_reset_lock word
+	is a memory barrier, that is when we read the waiters
+	field next, the read must be serialized in memory
+	after the reset. A speculative processor might
+	perform the read first, which could leave a waiting
+	thread hanging indefinitely.
+
+	Our current solution call every second
+	sync_arr_wake_threads_if_sema_free()
+	to wake up possible hanging threads if
+	they are missed in mutex_signal_object. */
+
+	if (mutex_get_waiters(mutex) != 0) {
+
+		mutex_signal_object(mutex);
+	}
+
+#ifdef UNIV_SYNC_PERF_STAT
+	mutex_exit_count++;
+#endif
+}
+
+/******************************************************************//**
+Locks a mutex for the current thread. If the mutex is reserved, the function
+spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
+before suspending the thread. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where locked */
+	ulint		line)		/*!< in: line where locked */
+{
+	ut_ad(mutex_validate(mutex));
+	ut_ad(!mutex_own(mutex));
+
+	/* Note that we do not peek at the value of lock_word before trying
+	the atomic test_and_set; we could peek, and possibly save time. */
+
+	if (!ib_mutex_test_and_set(mutex)) {
+		ut_d(mutex->thread_id = os_thread_get_curr_id());
+#ifdef UNIV_SYNC_DEBUG
+		mutex_set_debug_info(mutex, file_name, line);
+#endif
+		return;	/* Succeeded! */
+	}
+
+	mutex_spin_wait(mutex, file_name, line);
+}
+
+#ifdef UNIV_PFS_MUTEX
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_func(). */
+UNIV_INLINE
+void
+pfs_mutex_enter_func(
+/*=================*/
+	ib_mutex_t*	mutex,	/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where locked */
+	ulint		line)		/*!< in: line where locked */
+{
+	if (mutex->pfs_psi != NULL) {
+		PSI_mutex_locker*	locker;
+		PSI_mutex_locker_state	state;
+
+		locker = PSI_MUTEX_CALL(start_mutex_wait)(
+			&state, mutex->pfs_psi,
+			PSI_MUTEX_LOCK, file_name,
+			static_cast<uint>(line));
+
+		mutex_enter_func(mutex, file_name, line);
+
+		if (locker != NULL) {
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
+		}
+	} else {
+		mutex_enter_func(mutex, file_name, line);
+	}
+}
+
+/********************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_nowait_func.
+@return 0 if succeed, 1 if not */
+UNIV_INLINE
+ulint
+pfs_mutex_enter_nowait_func(
+/*========================*/
+	ib_mutex_t*	mutex,		/*!< in: pointer to mutex */
+	const char*	file_name,	/*!< in: file name where mutex
+					requested */
+	ulint		line)		/*!< in: line where requested */
+{
+	ulint		ret;
+
+	if (mutex->pfs_psi != NULL) {
+		PSI_mutex_locker*	locker;
+		PSI_mutex_locker_state		state;
+
+		locker = PSI_MUTEX_CALL(start_mutex_wait)(
+			&state, mutex->pfs_psi,
+			PSI_MUTEX_TRYLOCK, file_name,
+			static_cast<uint>(line));
+
+		ret = mutex_enter_nowait_func(mutex, file_name, line);
+
+		if (locker != NULL) {
+			PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
+		}
+	} else {
+		ret = mutex_enter_nowait_func(mutex, file_name, line);
+	}
+
+	return(ret);
+}
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_exit(), not directly
+this function!
+A wrap function of mutex_exit_func() with performance schema instrumentation.
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+pfs_mutex_exit_func(
+/*================*/
+	ib_mutex_t*	mutex)	/*!< in: pointer to mutex */
+{
+	if (mutex->pfs_psi != NULL) {
+		PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
+	}
+
+	mutex_exit_func(mutex);
+}
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_create(), not directly
+this function!
+A wrapper function for mutex_create_func(), registers the mutex
+with performance schema if "UNIV_PFS_MUTEX" is defined when
+creating the mutex */
+UNIV_INLINE
+void
+pfs_mutex_create_func(
+/*==================*/
+	mysql_pfs_key_t	key,		/*!< in: Performance Schema key */
+	ib_mutex_t*	mutex,		/*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+	const char*	cmutex_name,	/*!< in: mutex name */
+#  ifdef UNIV_SYNC_DEBUG
+	ulint		level,		/*!< in: level */
+#  endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
+	const char*	cfile_name,	/*!< in: file name where created */
+	ulint		cline)		/*!< in: file line where created */
+{
+	mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
+
+	mutex_create_func(mutex,
+# ifdef UNIV_DEBUG
+			  cmutex_name,
+#  ifdef UNIV_SYNC_DEBUG
+			  level,
+#  endif /* UNIV_SYNC_DEBUG */
+# endif /* UNIV_DEBUG */
+			  cfile_name,
+			  cline);
+}
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_free(), not directly
+this function!
+Wrapper function for mutex_free_func(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_mutex_free_func(
+/*================*/
+	ib_mutex_t*	mutex)	/*!< in: mutex */
+{
+	if (mutex->pfs_psi != NULL) {
+		PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
+		mutex->pfs_psi = NULL;
+	}
+
+	mutex_free_func(mutex);
+}
+
+#endif /* UNIV_PFS_MUTEX */
+
+#ifndef HAVE_ATOMIC_BUILTINS
+/**********************************************************//**
+Function that uses a mutex to decrement a variable atomically */
+UNIV_INLINE
+void
+os_atomic_dec_ulint_func(
+/*=====================*/
+	ib_mutex_t*	mutex,		/*!< in: mutex guarding the dec */
+	volatile ulint*	var,		/*!< in/out: variable to decrement */
+	ulint		delta)		/*!< in: delta to decrement */
+{
+	mutex_enter(mutex);
+
+	/* I don't think we will encounter a situation where
+	this check will not be required. */
+	ut_ad(*var >= delta);
+
+	*var -= delta;
+
+	mutex_exit(mutex);
+}
+
+/**********************************************************//**
+Function that uses a mutex to increment a variable atomically */
+UNIV_INLINE
+void
+os_atomic_inc_ulint_func(
+/*=====================*/
+	ib_mutex_t*	mutex,		/*!< in: mutex guarding the increment */
+	volatile ulint*	var,		/*!< in/out: variable to increment */
+	ulint		delta)		/*!< in: delta to increment */
+{
+	mutex_enter(mutex);
+
+	*var += delta;
+
+	mutex_exit(mutex);
+}
+#endif /* !HAVE_ATOMIC_BUILTINS */
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
new file mode 100644
index 00000000000..0d143004a7a
--- /dev/null
+++ b/storage/innobase/include/sync0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/sync0types.h
+Global types for sync
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0types_h
+#define sync0types_h
+
+struct ib_mutex_t;
+
+#endif
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
new file mode 100644
index 00000000000..662971a7841
--- /dev/null
+++ b/storage/innobase/include/trx0i_s.h
@@ -0,0 +1,311 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0i_s.h
+INFORMATION SCHEMA innodb_trx, innodb_locks and
+innodb_lock_waits tables cache structures and public
+functions.
+
+Created July 17, 2007 Vasil Dimov
+*******************************************************/
+
+#ifndef trx0i_s_h
+#define trx0i_s_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "dict0types.h"
+#include "ut0ut.h"
+
+/** The maximum amount of memory that can be consumed by innodb_trx,
+innodb_locks and innodb_lock_waits information schema tables. */
+#define TRX_I_S_MEM_LIMIT		16777216 /* 16 MiB */
+
+/** The maximum length of a string that can be stored in
+i_s_locks_row_t::lock_data */
+#define TRX_I_S_LOCK_DATA_MAX_LEN	8192
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_query */
+#define TRX_I_S_TRX_QUERY_MAX_LEN	1024
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_operation_state */
+#define TRX_I_S_TRX_OP_STATE_MAX_LEN	64
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_foreign_key_error */
+#define TRX_I_S_TRX_FK_ERROR_MAX_LEN	256
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_isolation_level */
+#define TRX_I_S_TRX_ISOLATION_LEVEL_MAX_LEN	16
+
+/** Safely copy strings in to the INNODB_TRX table's
+string based columns */
+#define TRX_I_S_STRING_COPY(data, field, constraint, tcache)	\
+do {								\
+	if (strlen(data) > constraint) {			\
+		char	buff[constraint + 1];			\
+		strncpy(buff, data, constraint);		\
+		buff[constraint] = '\0';			\
+								\
+		field = static_cast<const char*>(		\
+			ha_storage_put_memlim(			\
+			(tcache)->storage, buff, constraint + 1,\
+			MAX_ALLOWED_FOR_STORAGE(tcache)));	\
+	} else {						\
+		field = static_cast<const char*>(		\
+			ha_storage_put_str_memlim(		\
+			(tcache)->storage, data,		\
+			MAX_ALLOWED_FOR_STORAGE(tcache)));	\
+	}							\
+} while (0)
+
+/** A row of INFORMATION_SCHEMA.innodb_locks */
+struct i_s_locks_row_t;
+
+/** Objects of trx_i_s_cache_t::locks_hash */
+struct i_s_hash_chain_t;
+
+/** Objects of this type are added to the hash table
+trx_i_s_cache_t::locks_hash */
+struct i_s_hash_chain_t {
+	i_s_locks_row_t*	value;	/*!< row of
+					INFORMATION_SCHEMA.innodb_locks*/
+	i_s_hash_chain_t*	next;	/*!< next item in the hash chain */
+};
+
+/** This structure represents INFORMATION_SCHEMA.innodb_locks row */
+struct i_s_locks_row_t {
+	trx_id_t	lock_trx_id;	/*!< transaction identifier */
+	const char*	lock_mode;	/*!< lock mode from
+					lock_get_mode_str() */
+	const char*	lock_type;	/*!< lock type from
+					lock_get_type_str() */
+	const char*	lock_table;	/*!< table name from
+					lock_get_table_name() */
+	const char*	lock_index;	/*!< index name from
+					lock_rec_get_index_name() */
+	/** Information for record locks.  All these are
+	ULINT_UNDEFINED for table locks. */
+	/* @{ */
+	ulint		lock_space;	/*!< tablespace identifier */
+	ulint		lock_page;	/*!< page number within the_space */
+	ulint		lock_rec;	/*!< heap number of the record
+					on the page */
+	const char*	lock_data;	/*!< (some) content of the record */
+	/* @} */
+
+	/** The following are auxiliary and not included in the table */
+	/* @{ */
+	table_id_t	lock_table_id;
+					/*!< table identifier from
+					lock_get_table_id */
+	i_s_hash_chain_t hash_chain;	/*!< hash table chain node for
+					trx_i_s_cache_t::locks_hash */
+	/* @} */
+};
+
+/** This structure represents INFORMATION_SCHEMA.innodb_trx row */
+struct i_s_trx_row_t {
+	trx_id_t		trx_id;		/*!< transaction identifier */
+	const char*		trx_state;	/*!< transaction state from
+						trx_get_que_state_str() */
+	ib_time_t		trx_started;	/*!< trx_t::start_time */
+	const i_s_locks_row_t*	requested_lock_row;
+					/*!< pointer to a row
+					in innodb_locks if trx
+					is waiting, or NULL */
+	ib_time_t	trx_wait_started; /*!< trx_t::wait_started */
+	ullint		trx_weight;	/*!< TRX_WEIGHT() */
+	ulint		trx_mysql_thread_id; /*!< thd_get_thread_id() */
+	const char*	trx_query;	/*!< MySQL statement being
+					executed in the transaction */
+	struct charset_info_st*	trx_query_cs;
+					/*!< charset encode the MySQL
+					statement */
+	const char*	trx_operation_state; /*!< trx_t::op_info */
+	ulint		trx_tables_in_use;/*!< n_mysql_tables_in_use in
+					 trx_t */
+	ulint		trx_tables_locked;
+					/*!< mysql_n_tables_locked in
+					trx_t */
+	ulint		trx_lock_structs;/*!< list len of trx_locks in
+					trx_t */
+	ulint		trx_lock_memory_bytes;
+					/*!< mem_heap_get_size(
+					trx->lock_heap) */
+	ulint		trx_rows_locked;/*!< lock_number_of_rows_locked() */
+	ullint		trx_rows_modified;/*!< trx_t::undo_no */
+	ulint		trx_concurrency_tickets;
+					/*!< n_tickets_to_enter_innodb in
+					trx_t */
+	const char*	trx_isolation_level;
+					/*!< isolation_level in trx_t */
+	ibool		trx_unique_checks;
+					/*!< check_unique_secondary in trx_t*/
+	ibool		trx_foreign_key_checks;
+					/*!< check_foreigns in trx_t */
+	const char*	trx_foreign_key_error;
+					/*!< detailed_error in trx_t */
+	ibool		trx_has_search_latch;
+					/*!< has_search_latch in trx_t */
+	ulint		trx_search_latch_timeout;
+					/*!< search_latch_timeout in trx_t */
+	ulint		trx_is_read_only;
+					/*!< trx_t::read_only */
+	ulint		trx_is_autocommit_non_locking;
+					/*!< trx_is_autocommit_non_locking(trx)
+					*/
+};
+
+/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
+struct i_s_lock_waits_row_t {
+	const i_s_locks_row_t*	requested_lock_row;	/*!< requested lock */
+	const i_s_locks_row_t*	blocking_lock_row;	/*!< blocking lock */
+};
+
+/** Cache of INFORMATION_SCHEMA table data */
+struct trx_i_s_cache_t;
+
+/** Auxiliary enum used by functions that need to select one of the
+INFORMATION_SCHEMA tables */
+enum i_s_table {
+	I_S_INNODB_TRX,		/*!< INFORMATION_SCHEMA.innodb_trx */
+	I_S_INNODB_LOCKS,	/*!< INFORMATION_SCHEMA.innodb_locks */
+	I_S_INNODB_LOCK_WAITS	/*!< INFORMATION_SCHEMA.innodb_lock_waits */
+};
+
+/** This is the intermediate buffer where data needed to fill the
+INFORMATION SCHEMA tables is fetched and later retrieved by the C++
+code in handler/i_s.cc. */
+extern trx_i_s_cache_t*	trx_i_s_cache;
+
+/*******************************************************************//**
+Initialize INFORMATION SCHEMA trx related cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_init(
+/*===============*/
+	trx_i_s_cache_t*	cache);	/*!< out: cache to init */
+/*******************************************************************//**
+Free the INFORMATION SCHEMA trx related cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_free(
+/*===============*/
+	trx_i_s_cache_t*	cache);	/*!< in/out: cache to free */
+
+/*******************************************************************//**
+Issue a shared/read lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_start_read(
+/*=====================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/*******************************************************************//**
+Release a shared/read lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_end_read(
+/*===================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/*******************************************************************//**
+Issue an exclusive/write lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_start_write(
+/*======================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/*******************************************************************//**
+Release an exclusive/write lock on the tables cache. */
+UNIV_INTERN
+void
+trx_i_s_cache_end_write(
+/*====================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+
+/*******************************************************************//**
+Retrieves the number of used rows in the cache for a given
+INFORMATION SCHEMA table.
+@return	number of rows */
+UNIV_INTERN
+ulint
+trx_i_s_cache_get_rows_used(
+/*========================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	enum i_s_table		table);	/*!< in: which table */
+
+/*******************************************************************//**
+Retrieves the nth row in the cache for a given INFORMATION SCHEMA
+table.
+@return	row */
+UNIV_INTERN
+void*
+trx_i_s_cache_get_nth_row(
+/*======================*/
+	trx_i_s_cache_t*	cache,	/*!< in: cache */
+	enum i_s_table		table,	/*!< in: which table */
+	ulint			n);	/*!< in: row number */
+
+/*******************************************************************//**
+Update the transactions cache if it has not been read for some time.
+@return	0 - fetched, 1 - not */
+UNIV_INTERN
+int
+trx_i_s_possibly_fetch_data_into_cache(
+/*===================================*/
+	trx_i_s_cache_t*	cache);	/*!< in/out: cache */
+
+/*******************************************************************//**
+Returns TRUE if the data in the cache is truncated due to the memory
+limit posed by TRX_I_S_MEM_LIMIT.
+@return	TRUE if truncated */
+UNIV_INTERN
+ibool
+trx_i_s_cache_is_truncated(
+/*=======================*/
+	trx_i_s_cache_t*	cache);	/*!< in: cache */
+
+/** The maximum length of a resulting lock_id_size in
+trx_i_s_create_lock_id(), not including the terminating NUL.
+":%lu:%lu:%lu" -> 63 chars */
+#define TRX_I_S_LOCK_ID_MAX_LEN	(TRX_ID_MAX_LEN + 63)
+
+/*******************************************************************//**
+Crafts a lock id string from a i_s_locks_row_t object. Returns its
+second argument. This function aborts if there is not enough space in
+lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
+want to be 100% sure that it will not abort.
+@return	resulting lock id */
+UNIV_INTERN
+char*
+trx_i_s_create_lock_id(
+/*===================*/
+	const i_s_locks_row_t*	row,	/*!< in: innodb_locks row */
+	char*			lock_id,/*!< out: resulting lock_id */
+	ulint			lock_id_size);/*!< in: size of the lock id
+					buffer */
+
+#endif /* trx0i_s_h */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
new file mode 100644
index 00000000000..1e13c883800
--- /dev/null
+++ b/storage/innobase/include/trx0purge.h
@@ -0,0 +1,218 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0purge.h
+Purge old versions
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0purge_h
+#define trx0purge_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "que0types.h"
+#include "page0page.h"
+#include "usr0sess.h"
+#include "fil0fil.h"
+
+/** The global data structure coordinating a purge */
+extern trx_purge_t*	purge_sys;
+
+/** A dummy undo record used as a return value when we have a whole undo log
+which needs no purge */
+extern trx_undo_rec_t	trx_purge_dummy_rec;
+
+/********************************************************************//**
+Calculates the file address of an undo log header when we have the file
+address of its history list node.
+@return	file address of the log */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+	fil_addr_t	node_addr);	/*!< in: file address of the history
+					list node of the log */
+/********************************************************************//**
+Creates the global purge system control structure and inits the history
+mutex. */
+UNIV_INTERN
+void
+trx_purge_sys_create(
+/*=================*/
+	ulint		n_purge_threads,/*!< in: number of purge threads */
+	ib_bh_t*	ib_bh);		/*!< in/own: UNDO log min binary heap*/
+/********************************************************************//**
+Frees the global purge system control structure. */
+UNIV_INTERN
+void
+trx_purge_sys_close(void);
+/*======================*/
+/************************************************************************
+Adds the update undo log as the first log in the history list. Removes the
+update undo log segment from the rseg slot if it is too big for reuse. */
+UNIV_INTERN
+void
+trx_purge_add_update_undo_to_history(
+/*=================================*/
+	trx_t*	trx,		/*!< in: transaction */
+	page_t*	undo_page,	/*!< in: update undo log header page,
+				x-latched */
+	mtr_t*	mtr);		/*!< in: mtr */
+/*******************************************************************//**
+This function runs a purge batch.
+@return	number of undo log pages handled in the batch */
+UNIV_INTERN
+ulint
+trx_purge(
+/*======*/
+	ulint	n_purge_threads,	/*!< in: number of purge tasks to
+					submit to task queue. */
+	ulint	limit,			/*!< in: the maximum number of
+					records to purge in one batch */
+	bool	truncate);		/*!< in: truncate history if true */
+/*******************************************************************//**
+Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
+UNIV_INTERN
+void
+trx_purge_stop(void);
+/*================*/
+/*******************************************************************//**
+Resume purge, move to PURGE_STATE_RUN. */
+UNIV_INTERN
+void
+trx_purge_run(void);
+/*================*/
+
+/** Purge states */
+enum purge_state_t {
+	PURGE_STATE_INIT,		/*!< Purge instance created */
+	PURGE_STATE_RUN,		/*!< Purge should be running */
+	PURGE_STATE_STOP,		/*!< Purge should be stopped */
+	PURGE_STATE_EXIT,		/*!< Purge has been shutdown */
+	PURGE_STATE_DISABLED		/*!< Purge was never started */
+};
+
+/*******************************************************************//**
+Get the purge state.
+@return purge state. */
+UNIV_INTERN
+purge_state_t
+trx_purge_state(void);
+/*=================*/
+
+/** This is the purge pointer/iterator. We need both the undo no and the
+transaction no up to which purge has parsed and applied the records. */
+struct purge_iter_t {
+	trx_id_t	trx_no;		/*!< Purge has advanced past all
+					transactions whose number is less
+					than this */
+	undo_no_t	undo_no;	/*!< Purge has advanced past all records
+					whose undo number is less than this */
+};
+
+/** The control structure used in the purge operation */
+struct trx_purge_t{
+	sess_t*		sess;		/*!< System session running the purge
+					query */
+	trx_t*		trx;		/*!< System transaction running the
+					purge query: this trx is not in the
+					trx list of the trx system and it
+					never ends */
+	rw_lock_t	latch;		/*!< The latch protecting the purge
+					view. A purge operation must acquire an
+					x-latch here for the instant at which
+					it changes the purge view: an undo
+					log operation can prevent this by
+					obtaining an s-latch here. It also
+					protects state and running */
+	os_event_t	event;		/*!< State signal event */
+	ulint		n_stop;		/*!< Counter to track number stops */
+	volatile bool	running;	/*!< true, if purge is active,
+					we check this without the latch too */
+	volatile purge_state_t	state;	/*!< Purge coordinator thread states,
+					we check this in several places
+					without holding the latch. */
+	que_t*		query;		/*!< The query graph which will do the
+					parallelized purge operation */
+	read_view_t*	view;		/*!< The purge will not remove undo logs
+					which are >= this view (purge view) */
+	volatile ulint	n_submitted;	/*!< Count of total tasks submitted
+					to the task queue */
+	volatile ulint	n_completed;	/*!< Count of total tasks completed */
+
+	/*------------------------------*/
+	/* The following two fields form the 'purge pointer' which advances
+	during a purge, and which is used in history list truncation */
+
+	purge_iter_t	iter;		/* Limit up to which we have read and
+					parsed the UNDO log records.  Not
+					necessarily purged from the indexes.
+					Note that this can never be less than
+					the limit below, we check for this
+					invariant in trx0purge.cc */
+	purge_iter_t	limit;		/* The 'purge pointer' which advances
+					during a purge, and which is used in
+					history list truncation */
+#ifdef UNIV_DEBUG
+	purge_iter_t	done;		/* Indicate 'purge pointer' which have
+					purged already accurately. */
+#endif /* UNIV_DEBUG */
+	/*-----------------------------*/
+	ibool		next_stored;	/*!< TRUE if the info of the next record
+					to purge is stored below: if yes, then
+					the transaction number and the undo
+					number of the record are stored in
+					purge_trx_no and purge_undo_no above */
+	trx_rseg_t*	rseg;		/*!< Rollback segment for the next undo
+					record to purge */
+	ulint		page_no;	/*!< Page number for the next undo
+					record to purge, page number of the
+					log header, if dummy record */
+	ulint		offset;		/*!< Page offset for the next undo
+					record to purge, 0 if the dummy
+					record */
+	ulint		hdr_page_no;	/*!< Header page of the undo log where
+					the next record to purge belongs */
+	ulint		hdr_offset;	/*!< Header byte offset on the page */
+	/*-----------------------------*/
+	mem_heap_t*	heap;		/*!< Temporary storage used during a
+					purge: can be emptied after purge
+					completes */
+	/*-----------------------------*/
+	ib_bh_t*	ib_bh;		/*!< Binary min-heap, ordered on
+					rseg_queue_t::trx_no. It is protected
+					by the bh_mutex */
+	ib_mutex_t		bh_mutex;	/*!< Mutex protecting ib_bh */
+};
+
+/** Info required to purge a record */
+struct trx_purge_rec_t {
+	trx_undo_rec_t*	undo_rec;	/*!< Record to purge */
+	roll_ptr_t	roll_ptr;	/*!< File pointr to UNDO record */
+};
+
+#ifndef UNIV_NONINL
+#include "trx0purge.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
new file mode 100644
index 00000000000..ca9cc1fb894
--- /dev/null
+++ b/storage/innobase/include/trx0purge.ic
@@ -0,0 +1,62 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0purge.ic
+Purge old versions
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0undo.h"
+
+/********************************************************************//**
+Calculates the file address of an undo log header when we have the file
+address of its history list node.
+@return	file address of the log */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+	fil_addr_t	node_addr)	/*!< in: file address of the history
+					list node of the log */
+{
+	node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
+
+	return(node_addr);
+}
+
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+address of its history list node.
+@return	TRUE if purge_sys_t::limit <= purge_sys_t::iter*/
+UNIV_INLINE
+ibool
+trx_purge_check_limit(void)
+/*=======================*/
+{
+	ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no);
+
+	if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) {
+		ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no);
+	}
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
new file mode 100644
index 00000000000..50da55d2ea3
--- /dev/null
+++ b/storage/innobase/include/trx0rec.h
@@ -0,0 +1,326 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rec.h
+Transaction undo log record
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rec_h
+#define trx0rec_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+#include "dict0types.h"
+#include "data0data.h"
+#include "rem0types.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "que0types.h"
+
+/***********************************************************************//**
+Copies the undo record to the heap.
+@return	own: copy of undo log record */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+	const trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	mem_heap_t*		heap);		/*!< in: heap where copied */
+/**********************************************************************//**
+Reads the undo log record type.
+@return	record type */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Reads from an undo log record the record compiler info.
+@return	compiler info */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Returns TRUE if an undo log record contains an extern storage field.
+@return	TRUE if extern */
+UNIV_INLINE
+ibool
+trx_undo_rec_get_extern_storage(
+/*============================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Reads the undo log record number.
+@return	undo no */
+UNIV_INLINE
+undo_no_t
+trx_undo_rec_get_undo_no(
+/*=====================*/
+	const trx_undo_rec_t*	undo_rec);	/*!< in: undo log record */
+/**********************************************************************//**
+Returns the start of the undo record data area.
+@return	offset to the data area */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_offset(
+/*====================*/
+	undo_no_t	undo_no)	/*!< in: undo no read from node */
+	__attribute__((const));
+
+/**********************************************************************//**
+Returns the start of the undo record data area. */
+#define trx_undo_rec_get_ptr(undo_rec, undo_no)		\
+	((undo_rec) + trx_undo_rec_get_offset(undo_no))
+
+/**********************************************************************//**
+Reads from an undo log record the general parameters.
+@return	remaining part of undo log record after reading these values */
+UNIV_INTERN
+byte*
+trx_undo_rec_get_pars(
+/*==================*/
+	trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	ulint*		type,		/*!< out: undo record type:
+					TRX_UNDO_INSERT_REC, ... */
+	ulint*		cmpl_info,	/*!< out: compiler info, relevant only
+					for update type records */
+	bool*		updated_extern,	/*!< out: true if we updated an
+					externally stored fild */
+	undo_no_t*	undo_no,	/*!< out: undo log record number */
+	table_id_t*	table_id)	/*!< out: table id */
+	__attribute__((nonnull));
+/*******************************************************************//**
+Builds a row reference from an undo log record.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
+byte*
+trx_undo_rec_get_row_ref(
+/*=====================*/
+	byte*		ptr,	/*!< in: remaining part of a copy of an undo log
+				record, at the start of the row reference;
+				NOTE that this copy of the undo log record must
+				be preserved as long as the row reference is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t**	ref,	/*!< out, own: row reference */
+	mem_heap_t*	heap);	/*!< in: memory heap from which the memory
+				needed is allocated */
+/*******************************************************************//**
+Skips a row reference from an undo log record.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
+byte*
+trx_undo_rec_skip_row_ref(
+/*======================*/
+	byte*		ptr,	/*!< in: remaining part in update undo log
+				record, at the start of the row reference */
+	dict_index_t*	index);	/*!< in: clustered index */
+/**********************************************************************//**
+Reads from an undo log update record the system field values of the old
+version.
+@return	remaining part of undo log record after reading these values */
+UNIV_INTERN
+byte*
+trx_undo_update_rec_get_sys_cols(
+/*=============================*/
+	byte*		ptr,		/*!< in: remaining part of undo
+					log record after reading
+					general parameters */
+	trx_id_t*	trx_id,		/*!< out: trx id */
+	roll_ptr_t*	roll_ptr,	/*!< out: roll ptr */
+	ulint*		info_bits);	/*!< out: info bits state */
+/*******************************************************************//**
+Builds an update vector based on a remaining part of an undo log record.
+@return remaining part of the record, NULL if an error detected, which
+means that the record is corrupted */
+UNIV_INTERN
+byte*
+trx_undo_update_rec_get_update(
+/*===========================*/
+	byte*		ptr,	/*!< in: remaining part in update undo log
+				record, after reading the row reference
+				NOTE that this copy of the undo log record must
+				be preserved as long as the update vector is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint		type,	/*!< in: TRX_UNDO_UPD_EXIST_REC,
+				TRX_UNDO_UPD_DEL_REC, or
+				TRX_UNDO_DEL_MARK_REC; in the last case,
+				only trx id and roll ptr fields are added to
+				the update vector */
+	trx_id_t	trx_id,	/*!< in: transaction id from this undorecord */
+	roll_ptr_t	roll_ptr,/*!< in: roll pointer from this undo record */
+	ulint		info_bits,/*!< in: info bits from this undo record */
+	trx_t*		trx,	/*!< in: transaction */
+	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
+				needed is allocated */
+	upd_t**		upd);	/*!< out, own: update vector */
+/*******************************************************************//**
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
+@return	pointer to remaining part of undo record */
+UNIV_INTERN
+byte*
+trx_undo_rec_get_partial_row(
+/*=========================*/
+	byte*		ptr,	/*!< in: remaining part in update undo log
+				record of a suitable type, at the start of
+				the stored index columns;
+				NOTE that this copy of the undo log record must
+				be preserved as long as the partial row is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/*!< in: clustered index */
+	dtuple_t**	row,	/*!< out, own: partial row */
+	ibool		ignore_prefix, /*!< in: flag to indicate if we
+				expect blob prefixes in undo. Used
+				only in the assertion. */
+	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
+				needed is allocated */
+	__attribute__((nonnull, warn_unused_result));
+/***********************************************************************//**
+Writes information to an undo log about an insert, update, or a delete marking
+of a clustered index record. This information is used in a rollback of the
+transaction and in consistent reads that must look to the history of this
+transaction.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+trx_undo_report_row_operation(
+/*==========================*/
+	ulint		flags,		/*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
+					set, does nothing */
+	ulint		op_type,	/*!< in: TRX_UNDO_INSERT_OP or
+					TRX_UNDO_MODIFY_OP */
+	que_thr_t*	thr,		/*!< in: query thread */
+	dict_index_t*	index,		/*!< in: clustered index */
+	const dtuple_t*	clust_entry,	/*!< in: in the case of an insert,
+					index entry to insert into the
+					clustered index, otherwise NULL */
+	const upd_t*	update,		/*!< in: in the case of an update,
+					the update vector, otherwise NULL */
+	ulint		cmpl_info,	/*!< in: compiler info on secondary
+					index updates */
+	const rec_t*	rec,		/*!< in: case of an update or delete
+					marking, the record in the clustered
+					index, otherwise NULL */
+	const ulint*	offsets,	/*!< in: rec_get_offsets(rec) */
+	roll_ptr_t*	roll_ptr)	/*!< out: rollback pointer to the
+					inserted undo log record,
+					0 if BTR_NO_UNDO_LOG
+					flag was specified */
+	__attribute__((nonnull(3,4,10), warn_unused_result));
+/******************************************************************//**
+Copies an undo record to heap. This function can be called if we know that
+the undo log record exists.
+@return	own: copy of the record */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_undo_rec_low(
+/*======================*/
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer to record */
+	mem_heap_t*	heap)		/*!< in: memory heap where copied */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Build a previous version of a clustered index record. The caller must
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
+UNIV_INTERN
+bool
+trx_undo_prev_version_build(
+/*========================*/
+	const rec_t*	index_rec,/*!< in: clustered index record in the
+				index tree */
+	mtr_t*		index_mtr,/*!< in: mtr which contains the latch to
+				index_rec page and purge_view */
+	const rec_t*	rec,	/*!< in: version of a clustered index record */
+	dict_index_t*	index,	/*!< in: clustered index */
+	ulint*		offsets,/*!< in/out: rec_get_offsets(rec, index) */
+	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
+				needed is allocated */
+	rec_t**		old_vers)/*!< out, own: previous version, or NULL if
+				rec is the first inserted version, or if
+				history data has been deleted */
+	__attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses a redo log record of adding an undo log record.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_add_undo_rec(
+/*========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page);	/*!< in: page or NULL */
+/***********************************************************//**
+Parses a redo log record of erasing of an undo page end.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_erase_page_end(
+/*==========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+
+#ifndef UNIV_HOTBACKUP
+
+/* Types of an undo log record: these have to be smaller than 16, as the
+compilation info multiplied by 16 is ORed to this value in an undo log
+record */
+
+#define	TRX_UNDO_INSERT_REC	11	/* fresh insert into clustered index */
+#define	TRX_UNDO_UPD_EXIST_REC	12	/* update of a non-delete-marked
+					record */
+#define	TRX_UNDO_UPD_DEL_REC	13	/* update of a delete marked record to
+					a not delete marked record; also the
+					fields of the record can change */
+#define	TRX_UNDO_DEL_MARK_REC	14	/* delete marking of a record; fields
+					do not change */
+#define	TRX_UNDO_CMPL_INFO_MULT	16	/* compilation info is multiplied by
+					this and ORed to the type above */
+#define	TRX_UNDO_UPD_EXTERN	128	/* This bit can be ORed to type_cmpl
+					to denote that we updated external
+					storage fields: used by purge to
+					free the external storage */
+
+/* Operation type flags used in trx_undo_report_row_operation */
+#define	TRX_UNDO_INSERT_OP		1
+#define	TRX_UNDO_MODIFY_OP		2
+
+#ifndef UNIV_NONINL
+#include "trx0rec.ic"
+#endif
+
+#endif /* !UNIV_HOTBACKUP */
+
+#endif /* trx0rec_h */
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
new file mode 100644
index 00000000000..08704f6b821
--- /dev/null
+++ b/storage/innobase/include/trx0rec.ic
@@ -0,0 +1,113 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rec.ic
+Transaction undo log record
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Reads from an undo log record the record type.
+@return	record type */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
+}
+
+/**********************************************************************//**
+Reads from an undo log record the record compiler info.
+@return	compiler info */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
+}
+
+/**********************************************************************//**
+Returns TRUE if an undo log record contains an extern storage field.
+@return	TRUE if extern */
+UNIV_INLINE
+ibool
+trx_undo_rec_get_extern_storage(
+/*============================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/**********************************************************************//**
+Reads the undo log record number.
+@return	undo no */
+UNIV_INLINE
+undo_no_t
+trx_undo_rec_get_undo_no(
+/*=====================*/
+	const trx_undo_rec_t*	undo_rec)	/*!< in: undo log record */
+{
+	const byte*	ptr;
+
+	ptr = undo_rec + 3;
+
+	return(mach_ull_read_much_compressed(ptr));
+}
+
+/**********************************************************************//**
+Returns the start of the undo record data area.
+@return	offset to the data area */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_offset(
+/*====================*/
+	undo_no_t	undo_no)	/*!< in: undo no read from node */
+{
+	return(3 + mach_ull_get_much_compressed_size(undo_no));
+}
+
+/***********************************************************************//**
+Copies the undo record to the heap.
+@return	own: copy of undo log record */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+	const trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
+	mem_heap_t*		heap)		/*!< in: heap where copied */
+{
+	ulint		len;
+
+	len = mach_read_from_2(undo_rec)
+		- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
+	ut_ad(len < UNIV_PAGE_SIZE);
+	return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len));
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
new file mode 100644
index 00000000000..d5ab83d7767
--- /dev/null
+++ b/storage/innobase/include/trx0roll.h
@@ -0,0 +1,297 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0roll.h
+Transaction rollback
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0roll_h
+#define trx0roll_h
+
+#include "univ.i"
+#include "trx0trx.h"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+
+extern bool	trx_rollback_or_clean_is_active;
+
+/*******************************************************************//**
+Determines if this transaction is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if trx is an incomplete transaction that is being rolled
+back in crash recovery */
+UNIV_INTERN
+ibool
+trx_is_recv(
+/*========*/
+	const trx_t*	trx);	/*!< in: transaction */
+/*******************************************************************//**
+Returns a transaction savepoint taken at this point in time.
+@return	savepoint */
+UNIV_INTERN
+trx_savept_t
+trx_savept_take(
+/*============*/
+	trx_t*	trx);	/*!< in: transaction */
+/*******************************************************************//**
+Frees an undo number array. */
+UNIV_INTERN
+void
+trx_undo_arr_free(
+/*==============*/
+	trx_undo_arr_t*	arr);	/*!< in: undo number array */
+/*******************************************************************//**
+Returns pointer to nth element in an undo number array.
+@return	pointer to the nth element */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+	trx_undo_arr_t*	arr,	/*!< in: undo number array */
+	ulint		n);	/*!< in: position */
+/********************************************************************//**
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release.
+@return undo log record copied to heap, NULL if none left, or if the
+undo number of the top record would be less than the limit */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+	trx_t*		trx,	/*!< in: transaction */
+	undo_no_t	limit,	/*!< in: least undo number we need */
+	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
+	mem_heap_t*	heap);	/*!< in: memory heap where copied */
+/********************************************************************//**
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above.
+@return	TRUE if succeeded */
+UNIV_INTERN
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	undo_no_t	undo_no);/*!< in: undo number of the record */
+/*******************************************************************//**
+Releases a reserved undo record. */
+UNIV_INTERN
+void
+trx_undo_rec_release(
+/*=================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	undo_no_t	undo_no);/*!< in: undo number */
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery.  If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back. */
+UNIV_INTERN
+void
+trx_rollback_or_clean_recovered(
+/*============================*/
+	ibool	all);	/*!< in: FALSE=roll back dictionary transactions;
+			TRUE=roll back all non-PREPARED transactions */
+/*******************************************************************//**
+Rollback or clean up any incomplete transactions which were
+encountered in crash recovery.  If the transaction already was
+committed, then we clean up a possible insert undo log. If the
+transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread.
+@return	a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
+/*================================================*/
+	void*	arg __attribute__((unused)));
+			/*!< in: a dummy parameter required by
+			os_thread_create */
+/*********************************************************************//**
+Creates a rollback command node struct.
+@return	own: rollback node struct */
+UNIV_INTERN
+roll_node_t*
+roll_node_create(
+/*=============*/
+	mem_heap_t*	heap);	/*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a rollback command node in a query graph.
+@return	query thread to run next, or NULL */
+UNIV_INTERN
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_rollback_for_mysql(
+/*===================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
+/*******************************************************************//**
+Rollback the latest SQL statement for MySQL.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
+/*******************************************************************//**
+Rollback a transaction to a given savepoint or do a complete rollback.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_rollback_to_savepoint(
+/*======================*/
+	trx_t*		trx,	/*!< in: transaction handle */
+	trx_savept_t*	savept)	/*!< in: pointer to savepoint undo number, if
+				partial rollback requested, or NULL for
+				complete rollback */
+	__attribute__((nonnull(1)));
+/*******************************************************************//**
+Rolls back a transaction back to a named savepoint. Modifications after the
+savepoint are undone but InnoDB does NOT release the corresponding locks
+which are stored in memory. If a lock is 'implicit', that is, a new inserted
+row holds a lock where the lock information is carried by the trx id stored in
+the row, these locks are naturally released in the rollback. Savepoints which
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_rollback_to_savepoint_for_mysql(
+/*================================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name,		/*!< in: savepoint name */
+	ib_int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
+						position corresponding to this
+						savepoint; MySQL needs this
+						information to remove the
+						binlog entries of the queries
+						executed after the savepoint */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Creates a named savepoint. If the transaction is not yet started, starts it.
+If there is already a savepoint of the same name, this call erases that old
+savepoint and replaces it with a new. Savepoints are deleted in a transaction
+commit or rollback.
+@return	always DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_savepoint_for_mysql(
+/*====================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name,		/*!< in: savepoint name */
+	ib_int64_t	binlog_cache_pos)	/*!< in: MySQL binlog cache
+						position corresponding to this
+						connection at the time of the
+						savepoint */
+	__attribute__((nonnull));
+/*******************************************************************//**
+Releases a named savepoint. Savepoints which
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_release_savepoint_for_mysql(
+/*============================*/
+	trx_t*		trx,			/*!< in: transaction handle */
+	const char*	savepoint_name)		/*!< in: savepoint name */
+	__attribute__((nonnull, warn_unused_result));
+/*******************************************************************//**
+Frees savepoint structs starting from savep. */
+UNIV_INTERN
+void
+trx_roll_savepoints_free(
+/*=====================*/
+	trx_t*			trx,	/*!< in: transaction handle */
+	trx_named_savept_t*	savep);	/*!< in: free all savepoints > this one;
+					if this is NULL, free all savepoints
+					of trx */
+
+/** A cell of trx_undo_arr_t; used during a rollback and a purge */
+struct	trx_undo_inf_t{
+	ibool		in_use;	/*!< true if cell is being used */
+	trx_id_t	trx_no;	/*!< transaction number: not defined during
+				a rollback */
+	undo_no_t	undo_no;/*!< undo number of an undo record */
+};
+
+/** During a rollback and a purge, undo numbers of undo records currently being
+processed are stored in this array */
+
+struct trx_undo_arr_t{
+	ulint		n_cells;	/*!< number of cells in the array */
+	ulint		n_used;		/*!< number of cells in use */
+	trx_undo_inf_t*	infos;		/*!< the array of undo infos */
+	mem_heap_t*	heap;		/*!< memory heap from which allocated */
+};
+
+/** Rollback node states */
+enum roll_node_state {
+	ROLL_NODE_NONE = 0,		/*!< Unknown state */
+	ROLL_NODE_SEND,			/*!< about to send a rollback signal to
+					the transaction */
+	ROLL_NODE_WAIT			/*!< rollback signal sent to the
+				       	transaction, waiting for completion */
+};
+
+/** Rollback command node in a query graph */
+struct roll_node_t{
+	que_common_t		common;	/*!< node type: QUE_NODE_ROLLBACK */
+	enum roll_node_state	state;	/*!< node execution state */
+	ibool			partial;/*!< TRUE if we want a partial
+					rollback */
+	trx_savept_t		savept;	/*!< savepoint to which to
+					roll back, in the case of a
+					partial rollback */
+	que_thr_t*		undo_thr;/*!< undo query graph */
+};
+
+/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
+struct trx_named_savept_t{
+	char*		name;		/*!< savepoint name */
+	trx_savept_t	savept;		/*!< the undo number corresponding to
+					the savepoint */
+	ib_int64_t	mysql_binlog_cache_pos;
+					/*!< the MySQL binlog cache position
+					corresponding to this savepoint, not
+					defined if the MySQL binlogging is not
+					enabled */
+	UT_LIST_NODE_T(trx_named_savept_t)
+			trx_savepoints;	/*!< the list of savepoints of a
+					transaction */
+};
+
+#ifndef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
new file mode 100644
index 00000000000..178e9bb730a
--- /dev/null
+++ b/storage/innobase/include/trx0roll.ic
@@ -0,0 +1,40 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0roll.ic
+Transaction rollback
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/*******************************************************************//**
+Returns pointer to nth element in an undo number array.
+@return	pointer to the nth element */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+	trx_undo_arr_t*	arr,	/*!< in: undo number array */
+	ulint		n)	/*!< in: position */
+{
+	ut_ad(arr);
+	ut_ad(n < arr->n_cells);
+
+	return(arr->infos + n);
+}
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
new file mode 100644
index 00000000000..185b05876b4
--- /dev/null
+++ b/storage/innobase/include/trx0rseg.h
@@ -0,0 +1,230 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rseg.h
+Rollback segment
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rseg_h
+#define trx0rseg_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "trx0sys.h"
+#include "ut0bh.h"
+
+/******************************************************************//**
+Gets a rollback segment header.
+@return	rollback segment header, page x-latched */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Gets a newly created rollback segment header.
+@return	rollback segment header, page x-latched */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr);		/*!< in: mtr */
+/***************************************************************//**
+Gets the file page number of the nth undo log slot.
+@return	page number of the undo log segment */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***************************************************************//**
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	ulint		page_no,/*!< in: page number of the undo log segment */
+	mtr_t*		mtr);	/*!< in: mtr */
+/****************************************************************//**
+Looks for a free slot for an undo log segment.
+@return	slot index or ULINT_UNDEFINED if not found */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	mtr_t*		mtr);	/*!< in: mtr */
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return	rollback segment */
+UNIV_INLINE
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+	ulint	id);		/*!< in: rollback segment id */
+/****************************************************************//**
+Creates a rollback segment header. This function is called only when
+a new rollback segment is created in the database.
+@return	page number of the created segment, FIL_NULL if fail */
+UNIV_INTERN
+ulint
+trx_rseg_header_create(
+/*===================*/
+	ulint	space,		/*!< in: space id */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	max_size,	/*!< in: max size in pages */
+	ulint	rseg_slot_no,	/*!< in: rseg id == slot number in trx sys */
+	mtr_t*	mtr);		/*!< in: mtr */
+/*********************************************************************//**
+Creates the memory copies for rollback segments and initializes the
+rseg array in trx_sys at a database startup. */
+UNIV_INTERN
+void
+trx_rseg_array_init(
+/*================*/
+	trx_sysf_t*	sys_header,	/*!< in/out: trx system header */
+	ib_bh_t*	ib_bh,		/*!< in: rseg queue */
+	mtr_t*		mtr);		/*!< in/out: mtr */
+/***************************************************************************
+Free's an instance of the rollback segment in memory. */
+UNIV_INTERN
+void
+trx_rseg_mem_free(
+/*==============*/
+	trx_rseg_t*	rseg);		/*!< in, own: instance to free */
+
+/*********************************************************************
+Creates a rollback segment. */
+UNIV_INTERN
+trx_rseg_t*
+trx_rseg_create(
+/*============*/
+	ulint	space);			/*!< in: id of UNDO tablespace */
+
+/********************************************************************
+Get the number of unique rollback tablespaces in use except space id 0.
+The last space id will be the sentinel value ULINT_UNDEFINED. The array
+will be sorted on space id. Note: space_ids should have have space for
+TRX_SYS_N_RSEGS + 1 elements.
+@return number of unique rollback tablespaces in use. */
+UNIV_INTERN
+ulint
+trx_rseg_get_n_undo_tablespaces(
+/*============================*/
+	ulint*		space_ids);	/*!< out: array of space ids of
+					UNDO tablespaces */
+/* Number of undo log slots in a rollback segment file copy */
+#define TRX_RSEG_N_SLOTS	(UNIV_PAGE_SIZE / 16)
+
+/* Maximum number of transactions supported by a single rollback segment */
+#define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
+
+/* The rollback segment memory object */
+struct trx_rseg_t{
+	/*--------------------------------------------------------*/
+	ulint		id;	/*!< rollback segment id == the index of
+				its slot in the trx system file copy */
+	ib_mutex_t		mutex;	/*!< mutex protecting the fields in this
+				struct except id, which is constant */
+	ulint		space;	/*!< space where the rollback segment is
+				header is placed */
+	ulint		zip_size;/* compressed page size of space
+				in bytes, or 0 for uncompressed spaces */
+	ulint		page_no;/* page number of the rollback segment
+				header */
+	ulint		max_size;/* maximum allowed size in pages */
+	ulint		curr_size;/* current size in pages */
+	/*--------------------------------------------------------*/
+	/* Fields for update undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
+					/* List of update undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
+					/* List of update undo log segments
+					cached for fast reuse */
+	/*--------------------------------------------------------*/
+	/* Fields for insert undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
+					/* List of insert undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+					/* List of insert undo log segments
+					cached for fast reuse */
+	/*--------------------------------------------------------*/
+	ulint		last_page_no;	/*!< Page number of the last not yet
+					purged log header in the history list;
+					FIL_NULL if all list purged */
+	ulint		last_offset;	/*!< Byte offset of the last not yet
+					purged log header */
+	trx_id_t	last_trx_no;	/*!< Transaction number of the last not
+					yet purged log */
+	ibool		last_del_marks;	/*!< TRUE if the last not yet purged log
+					needs purging */
+};
+
+/** For prioritising the rollback segments for purge. */
+struct rseg_queue_t {
+        trx_id_t	trx_no;         /*!< trx_rseg_t::last_trx_no */
+        trx_rseg_t*     rseg;           /*!< Rollback segment */
+};
+
+/* Undo log segment slot in a rollback segment header */
+/*-------------------------------------------------------------*/
+#define	TRX_RSEG_SLOT_PAGE_NO	0	/* Page number of the header page of
+					an undo log segment */
+/*-------------------------------------------------------------*/
+/* Slot size */
+#define TRX_RSEG_SLOT_SIZE	4
+
+/* The offset of the rollback segment header on its page */
+#define	TRX_RSEG		FSEG_PAGE_DATA
+
+/* Transaction rollback segment header */
+/*-------------------------------------------------------------*/
+#define	TRX_RSEG_MAX_SIZE	0	/* Maximum allowed size for rollback
+					segment in pages */
+#define	TRX_RSEG_HISTORY_SIZE	4	/* Number of file pages occupied
+					by the logs in the history list */
+#define	TRX_RSEG_HISTORY	8	/* The update undo logs for committed
+					transactions */
+#define	TRX_RSEG_FSEG_HEADER	(8 + FLST_BASE_NODE_SIZE)
+					/* Header for the file segment where
+					this page is placed */
+#define TRX_RSEG_UNDO_SLOTS	(8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
+					/* Undo log segment slots */
+/*-------------------------------------------------------------*/
+
+#ifndef UNIV_NONINL
+#include "trx0rseg.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
new file mode 100644
index 00000000000..30743da9b8c
--- /dev/null
+++ b/storage/innobase/include/trx0rseg.ic
@@ -0,0 +1,167 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0rseg.ic
+Rollback segment
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+#include "mtr0log.h"
+#include "trx0sys.h"
+
+/******************************************************************//**
+Gets a rollback segment header.
+@return	rollback segment header, page x-latched */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block;
+	trx_rsegf_t*	header;
+
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
+
+	header = TRX_RSEG + buf_block_get_frame(block);
+
+	return(header);
+}
+
+/******************************************************************//**
+Gets a newly created rollback segment header.
+@return	rollback segment header, page x-latched */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number of the header */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block;
+	trx_rsegf_t*	header;
+
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
+
+	header = TRX_RSEG + buf_block_get_frame(block);
+
+	return(header);
+}
+
+/***************************************************************//**
+Gets the file page number of the nth undo log slot.
+@return	page number of the undo log segment */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	if (n >= TRX_RSEG_N_SLOTS) {
+		fprintf(stderr,
+			"InnoDB: Error: trying to get slot %lu of rseg\n",
+			(ulong) n);
+		ut_error;
+	}
+
+	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
+			      + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
+}
+
+/***************************************************************//**
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	ulint		n,	/*!< in: index of slot */
+	ulint		page_no,/*!< in: page number of the undo log segment */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	if (n >= TRX_RSEG_N_SLOTS) {
+		fprintf(stderr,
+			"InnoDB: Error: trying to set slot %lu of rseg\n",
+			(ulong) n);
+		ut_error;
+	}
+
+	mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
+			 page_no, MLOG_4BYTES, mtr);
+}
+
+/****************************************************************//**
+Looks for a free slot for an undo log segment.
+@return	slot index or ULINT_UNDEFINED if not found */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+	trx_rsegf_t*	rsegf,	/*!< in: rollback segment header */
+	mtr_t*		mtr)	/*!< in: mtr */
+{
+	ulint		i;
+	ulint		page_no;
+
+	for (i = 0;
+#ifndef UNIV_DEBUG
+	     i < TRX_RSEG_N_SLOTS;
+#else
+	     i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS);
+#endif
+	     i++) {
+
+		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+
+		if (page_no == FIL_NULL) {
+
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/******************************************************************//**
+Looks for a rollback segment, based on the rollback segment id.
+@return	rollback segment */
+UNIV_INLINE
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+	ulint	id)	/*!< in: rollback segment id */
+{
+	ut_a(id < TRX_SYS_N_RSEGS);
+
+	return(trx_sys->rseg_array[id]);
+}
+
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
new file mode 100644
index 00000000000..70f214d1ac7
--- /dev/null
+++ b/storage/innobase/include/trx0sys.h
@@ -0,0 +1,674 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0sys.h
+Transaction system
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0sys_h
+#define trx0sys_h
+
+#include "univ.i"
+
+#include "trx0types.h"
+#include "fsp0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#ifndef UNIV_HOTBACKUP
+#include "mtr0mtr.h"
+#include "ut0byte.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "ut0lst.h"
+#include "ut0bh.h"
+#include "read0types.h"
+#include "page0types.h"
+#include "ut0bh.h"
+
+typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t;
+
+/** In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. */
+/* @{ */
+/** Master binlog file name */
+extern char		trx_sys_mysql_master_log_name[];
+/** Master binlog file position.  We have successfully got the updates
+up to this position.  -1 means that no crash recovery was needed, or
+there was no master log position info inside InnoDB.*/
+extern ib_int64_t	trx_sys_mysql_master_log_pos;
+/* @} */
+
+/** If this MySQL server uses binary logging, after InnoDB has been inited
+and if it has done a crash recovery, we store the binlog file name and position
+here. */
+/* @{ */
+/** Binlog file name */
+extern char		trx_sys_mysql_bin_log_name[];
+/** Binlog file position, or -1 if unknown */
+extern ib_int64_t	trx_sys_mysql_bin_log_pos;
+/* @} */
+
+/** The transaction system */
+extern trx_sys_t*	trx_sys;
+
+/***************************************************************//**
+Checks if a page address is the trx sys header page.
+@return	TRUE if trx sys header page */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no);/*!< in: page number */
+/*****************************************************************//**
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started.
+@return min binary heap of rsegs to purge */
+UNIV_INTERN
+ib_bh_t*
+trx_sys_init_at_db_start(void);
+/*==========================*/
+/*****************************************************************//**
+Creates the trx_sys instance and initializes ib_bh and mutex. */
+UNIV_INTERN
+void
+trx_sys_create(void);
+/*================*/
+/*****************************************************************//**
+Creates and initializes the transaction system at the database creation. */
+UNIV_INTERN
+void
+trx_sys_create_sys_pages(void);
+/*==========================*/
+/****************************************************************//**
+Looks for a free slot for a rollback segment in the trx system file copy.
+@return	slot index or ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+	mtr_t*		mtr);		/*!< in: mtr */
+/***************************************************************//**
+Gets the pointer in the nth slot of the rseg array.
+@return	pointer to rseg object, NULL if slot not in use */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+	trx_sys_t*	sys,	/*!< in: trx system */
+	ulint		n);	/*!< in: index of slot */
+/**********************************************************************//**
+Gets a pointer to the transaction system file copy and x-locks its page.
+@return	pointer to system file copy, page x-locked */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+	mtr_t*	mtr);	/*!< in: mtr */
+/*****************************************************************//**
+Gets the space of the nth rollback segment slot in the trx system
+file copy.
+@return	space id */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Gets the page number of the nth rollback segment slot in the trx system
+file copy.
+@return	page number, FIL_NULL if slot unused */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		space,		/*!< in: space id */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Sets the page number of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		page_no,	/*!< in: page number, FIL_NULL if
+					the slot is reset to unused */
+	mtr_t*		mtr);		/*!< in: mtr */
+/*****************************************************************//**
+Allocates a new transaction id.
+@return	new, allocated trx id */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_new_trx_id(void);
+/*========================*/
+/*****************************************************************//**
+Determines the maximum transaction id.
+@return maximum currently allocated trx id; will be stale after the
+next call to trx_sys_get_new_trx_id() */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_max_trx_id(void);
+/*========================*/
+
+#ifdef UNIV_DEBUG
+/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
+extern uint			trx_rseg_n_slots_debug;
+#endif
+
+/*****************************************************************//**
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+	byte*		ptr,	/*!< in: pointer to memory where written */
+	trx_id_t	id);	/*!< in: id */
+/*****************************************************************//**
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_...
+@return	id */
+UNIV_INLINE
+trx_id_t
+trx_read_trx_id(
+/*============*/
+	const byte*	ptr);	/*!< in: pointer to memory from where to read */
+/****************************************************************//**
+Looks for the trx instance with the given id in the rw trx_list.
+The caller must be holding trx_sys->mutex.
+@return	the trx handle or NULL if not found;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_get_rw_trx_by_id(
+/*=================*/
+	trx_id_t	trx_id);/*!< in: trx id to search for */
+/****************************************************************//**
+Returns the minimum trx id in rw trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->state to
+find out if the minimum trx id transaction itself is active, or already
+committed.)
+@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+UNIV_INLINE
+trx_id_t
+trx_rw_min_trx_id(void);
+/*===================*/
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. Caller must hold
+trx_sys->mutex in shared mode. If the caller is not holding
+lock_sys->mutex, the transaction may already have been committed.
+@return	transaction instance if active, or NULL;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_rw_is_active_low(
+/*=================*/
+	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
+	ibool*		corrupt);	/*!< in: NULL or pointer to a flag
+					that will be set if corrupt */
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. If the caller is
+not holding lock_sys->mutex, the transaction may already have been
+committed.
+@return	transaction instance if active, or NULL;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_rw_is_active(
+/*=============*/
+	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
+	ibool*		corrupt);	/*!< in: NULL or pointer to a flag
+					that will be set if corrupt */
+#ifdef UNIV_DEBUG
+/****************************************************************//**
+Checks whether a trx is in one of rw_trx_list or ro_trx_list.
+@return	TRUE if is in */
+UNIV_INTERN
+ibool
+trx_in_trx_list(
+/*============*/
+	const trx_t*	in_trx)		/*!< in: transaction */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/***********************************************************//**
+Assert that a transaction has been recovered.
+@return TRUE */
+UNIV_INLINE
+ibool
+trx_assert_recovered(
+/*=================*/
+	trx_id_t	trx_id)		/*!< in: transaction identifier */
+	__attribute__((warn_unused_result));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+/*****************************************************************//**
+Updates the offset information about the end of the MySQL binlog entry
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
+UNIV_INTERN
+void
+trx_sys_update_mysql_binlog_offset(
+/*===============================*/
+	const char*	file_name,/*!< in: MySQL log file name */
+	ib_int64_t	offset,	/*!< in: position in that log file */
+	ulint		field,	/*!< in: offset of the MySQL log info field in
+				the trx sys header */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog offset info in the trx system header if
+the magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset(void);
+/*===================================*/
+/*****************************************************************//**
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_master_log_pos(void);
+/*====================================*/
+/*****************************************************************//**
+Initializes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_init(void);
+/*==========================*/
+/*****************************************************************//**
+Closes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_close(void);
+/*===========================*/
+/********************************************************************//**
+Tags the system table space with minimum format id if it has not been
+tagged yet.
+WARNING: This function is only called during the startup and AFTER the
+redo log application during recovery has finished. */
+UNIV_INTERN
+void
+trx_sys_file_format_tag_init(void);
+/*==============================*/
+/*****************************************************************//**
+Shutdown/Close the transaction system. */
+UNIV_INTERN
+void
+trx_sys_close(void);
+/*===============*/
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+	const ulint	id);		/*!< in: id of the file format */
+/*****************************************************************//**
+Set the file format id unconditionally except if it's already the
+same value.
+@return	TRUE if value updated */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_set(
+/*========================*/
+	ulint		format_id,	/*!< in: file format id */
+	const char**	name);		/*!< out: max file format name or
+					NULL if not needed. */
+/*********************************************************************
+Creates the rollback segments
+@return number of rollback segments that are active. */
+UNIV_INTERN
+ulint
+trx_sys_create_rsegs(
+/*=================*/
+	ulint	n_spaces,	/*!< number of tablespaces for UNDO logs */
+	ulint	n_rsegs);	/*!< number of rollback segments to create */
+/*****************************************************************//**
+Get the number of transaction in the system, independent of their state.
+@return count of transactions in trx_sys_t::trx_list */
+UNIV_INLINE
+ulint
+trx_sys_get_n_rw_trx(void);
+/*======================*/
+
+/*********************************************************************
+Check if there are any active (non-prepared) transactions.
+@return total number of active transactions or 0 if none */
+UNIV_INTERN
+ulint
+trx_sys_any_active_transactions(void);
+/*=================================*/
+#else /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog info in the system header if the
+magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset_from_page(
+/*========================================*/
+	const byte*	page);	/*!< in: buffer containing the trx
+				system header page, i.e., page number
+				TRX_SYS_PAGE_NO in the tablespace */
+/*****************************************************************//**
+Reads the file format id from the first system table space file.
+Even if the call succeeds and returns TRUE, the returned format id
+may be ULINT_UNDEFINED signalling that the format id was not present
+in the data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_file_format_id(
+/*========================*/
+	const char *pathname,	/*!< in: pathname of the first system
+				table space file */
+	ulint *format_id);	/*!< out: file format of the system table
+				space */
+/*****************************************************************//**
+Reads the file format id from the given per-table data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_pertable_file_format_id(
+/*=================================*/
+	const char *pathname,	/*!< in: pathname of a per-table
+				datafile */
+	ulint *format_id);	/*!< out: file format of the per-table
+				data file */
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void);
+/*=============================*/
+/*****************************************************************//**
+Check for the max file format tag stored on disk.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+trx_sys_file_format_max_check(
+/*==========================*/
+	ulint		max_format_id);	/*!< in: the max format id to check */
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return	TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+	const char**	name,		/*!< out: max file format name */
+	ulint		format_id);	/*!< in: file format identifier */
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return	pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+	const ulint	id);	/*!< in: id of the file format */
+
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Validate the trx_sys_t::trx_list. */
+UNIV_INTERN
+ibool
+trx_sys_validate_trx_list(void);
+/*===========================*/
+#endif /* UNIV_DEBUG */
+
+/* The automatically created system rollback segment has this id */
+#define TRX_SYS_SYSTEM_RSEG_ID	0
+
+/* Space id and page no where the trx system file copy resides */
+#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
+#include "fsp0fsp.h"
+#define	TRX_SYS_PAGE_NO	FSP_TRX_SYS_PAGE_NO
+
+/* The offset of the transaction system header on the page */
+#define	TRX_SYS		FSEG_PAGE_DATA
+
+/** Transaction system header */
+/*------------------------------------------------------------- @{ */
+#define	TRX_SYS_TRX_ID_STORE	0	/*!< the maximum trx id or trx
+					number modulo
+					TRX_SYS_TRX_ID_UPDATE_MARGIN
+					written to a file page by any
+					transaction; the assignment of
+					transaction ids continues from
+					this number rounded up by
+					TRX_SYS_TRX_ID_UPDATE_MARGIN
+					plus
+					TRX_SYS_TRX_ID_UPDATE_MARGIN
+					when the database is
+					started */
+#define TRX_SYS_FSEG_HEADER	8	/*!< segment header for the
+					tablespace segment the trx
+					system is created into */
+#define	TRX_SYS_RSEGS		(8 + FSEG_HEADER_SIZE)
+					/*!< the start of the array of
+					rollback segment specification
+					slots */
+/*------------------------------------------------------------- @} */
+
+/* Max number of rollback segments: the number of segment specification slots
+in the transaction system array; rollback segment id must fit in one (signed)
+byte, therefore 128; each slot is currently 8 bytes in size. If you want
+to raise the level to 256 then you will need to fix some assertions that
+impose the 7 bit restriction. e.g., mach_write_to_3() */
+#define	TRX_SYS_N_RSEGS			128
+/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one
+rollback segment.  It initialized some arrays with this number of entries.
+We must remember this limit in order to keep file compatibility. */
+#define TRX_SYS_OLD_N_RSEGS		256
+
+/** Maximum length of MySQL binlog file name, in bytes.
+@see trx_sys_mysql_master_log_name
+@see trx_sys_mysql_bin_log_name */
+#define TRX_SYS_MYSQL_LOG_NAME_LEN	512
+/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
+#define TRX_SYS_MYSQL_LOG_MAGIC_N	873422344
+
+#if UNIV_PAGE_SIZE_MIN < 4096
+# error "UNIV_PAGE_SIZE_MIN < 4096"
+#endif
+/** The offset of the MySQL replication info in the trx system header;
+this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+#define TRX_SYS_MYSQL_MASTER_LOG_INFO	(UNIV_PAGE_SIZE - 2000)
+
+/** The offset of the MySQL binlog offset info in the trx system header */
+#define TRX_SYS_MYSQL_LOG_INFO		(UNIV_PAGE_SIZE - 1000)
+#define	TRX_SYS_MYSQL_LOG_MAGIC_N_FLD	0	/*!< magic number which is
+						TRX_SYS_MYSQL_LOG_MAGIC_N
+						if we have valid data in the
+						MySQL binlog info */
+#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH	4	/*!< high 4 bytes of the offset
+						within that file */
+#define TRX_SYS_MYSQL_LOG_OFFSET_LOW	8	/*!< low 4 bytes of the offset
+						within that file */
+#define TRX_SYS_MYSQL_LOG_NAME		12	/*!< MySQL log file name */
+
+/** Doublewrite buffer */
+/* @{ */
+/** The offset of the doublewrite buffer header on the trx system header page */
+#define TRX_SYS_DOUBLEWRITE		(UNIV_PAGE_SIZE - 200)
+/*-------------------------------------------------------------*/
+#define TRX_SYS_DOUBLEWRITE_FSEG	0	/*!< fseg header of the fseg
+						containing the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_MAGIC	FSEG_HEADER_SIZE
+						/*!< 4-byte magic number which
+						shows if we already have
+						created the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_BLOCK1	(4 + FSEG_HEADER_SIZE)
+						/*!< page number of the
+						first page in the first
+						sequence of 64
+						(= FSP_EXTENT_SIZE) consecutive
+						pages in the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_BLOCK2	(8 + FSEG_HEADER_SIZE)
+						/*!< page number of the
+						first page in the second
+						sequence of 64 consecutive
+						pages in the doublewrite
+						buffer */
+#define TRX_SYS_DOUBLEWRITE_REPEAT	12	/*!< we repeat
+						TRX_SYS_DOUBLEWRITE_MAGIC,
+						TRX_SYS_DOUBLEWRITE_BLOCK1,
+						TRX_SYS_DOUBLEWRITE_BLOCK2
+						so that if the trx sys
+						header is half-written
+						to disk, we still may
+						be able to recover the
+						information */
+/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
+we must reset the doublewrite buffer, because starting from 4.1.x the
+space id of a data page is stored into
+FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
+#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
+
+/*-------------------------------------------------------------*/
+/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */
+#define TRX_SYS_DOUBLEWRITE_MAGIC_N	536853855
+/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */
+#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
+
+/** Size of the doublewrite block in pages */
+#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE
+/* @} */
+
+/** File format tag */
+/* @{ */
+/** The offset of the file format tag on the trx system header page
+(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
+#define TRX_SYS_FILE_FORMAT_TAG		(UNIV_PAGE_SIZE - 16)
+
+/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
+identifier is added to this constant. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW	3645922177UL
+/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH	2745987765UL
+/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
+identifier is added to this 64-bit constant. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N					\
+	((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32	\
+	 | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
+/* @} */
+
+#ifndef UNIV_HOTBACKUP
+/** The transaction system central memory data structure. */
+struct trx_sys_t{
+
+	ib_mutex_t		mutex;		/*!< mutex protecting most fields in
+					this structure except when noted
+					otherwise */
+	ulint		n_prepared_trx;	/*!< Number of transactions currently
+					in the XA PREPARED state */
+	ulint		n_prepared_recovered_trx; /*!< Number of transactions
+					currently in XA PREPARED state that are
+					also recovered. Such transactions cannot
+					be added during runtime. They can only
+					occur after recovery if mysqld crashed
+					while there were XA PREPARED
+					transactions. We disable query cache
+					if such transactions exist. */
+	trx_id_t	max_trx_id;	/*!< The smallest number not yet
+					assigned as a transaction id or
+					transaction number */
+#ifdef UNIV_DEBUG
+	trx_id_t	rw_max_trx_id;	/*!< Max trx id of read-write transactions
+					which exist or existed */
+#endif
+	trx_list_t	rw_trx_list;	/*!< List of active and committed in
+					memory read-write transactions, sorted
+					on trx id, biggest first. Recovered
+					transactions are always on this list. */
+	trx_list_t	ro_trx_list;	/*!< List of active and committed in
+					memory read-only transactions, sorted
+					on trx id, biggest first. NOTE:
+					The order for read-only transactions
+					is not necessary. We should exploit
+					this and increase concurrency during
+					add/remove. */
+	trx_list_t	mysql_trx_list;	/*!< List of transactions created
+					for MySQL. All transactions on
+					ro_trx_list are on mysql_trx_list. The
+					rw_trx_list can contain system
+					transactions and recovered transactions
+					that will not be in the mysql_trx_list.
+					There can be active non-locking
+					auto-commit read only transactions that
+					are on this list but not on ro_trx_list.
+					mysql_trx_list may additionally contain
+					transactions that have not yet been
+					started in InnoDB. */
+	trx_rseg_t*	const rseg_array[TRX_SYS_N_RSEGS];
+					/*!< Pointer array to rollback
+					segments; NULL if slot not in use;
+					created and destroyed in
+					single-threaded mode; not protected
+					by any mutex, because it is read-only
+					during multi-threaded operation */
+	ulint		rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
+					list (update undo logs for committed
+					transactions), protected by
+					rseg->mutex */
+	UT_LIST_BASE_NODE_T(read_view_t) view_list;
+					/*!< List of read views sorted
+					on trx no, biggest first */
+};
+
+/** When a trx id which is zero modulo this number (which must be a power of
+two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
+page is updated */
+#define TRX_SYS_TRX_ID_WRITE_MARGIN	256
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
new file mode 100644
index 00000000000..e097e29b551
--- /dev/null
+++ b/storage/innobase/include/trx0sys.ic
@@ -0,0 +1,512 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0sys.ic
+Transaction system
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0trx.h"
+#include "data0type.h"
+#ifndef UNIV_HOTBACKUP
+# include "srv0srv.h"
+# include "mtr0log.h"
+
+/* The typedef for rseg slot in the file copy */
+typedef byte	trx_sysf_rseg_t;
+
+/* Rollback segment specification slot offsets */
+/*-------------------------------------------------------------*/
+#define	TRX_SYS_RSEG_SPACE	0	/* space where the segment
+					header is placed; starting with
+					MySQL/InnoDB 5.1.7, this is
+					UNIV_UNDEFINED if the slot is unused */
+#define	TRX_SYS_RSEG_PAGE_NO	4	/*  page number where the segment
+					header is placed; this is FIL_NULL
+					if the slot is unused */
+/*-------------------------------------------------------------*/
+/* Size of a rollback segment specification slot */
+#define TRX_SYS_RSEG_SLOT_SIZE	8
+
+/*****************************************************************//**
+Writes the value of max_trx_id to the file based trx system header. */
+UNIV_INTERN
+void
+trx_sys_flush_max_trx_id(void);
+/*==========================*/
+
+/***************************************************************//**
+Checks if a page address is the trx sys header page.
+@return	TRUE if trx sys header page */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+	ulint	space,	/*!< in: space */
+	ulint	page_no)/*!< in: page number */
+{
+	if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***************************************************************//**
+Gets the pointer in the nth slot of the rseg array.
+@return	pointer to rseg object, NULL if slot not in use */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+	trx_sys_t*	sys,	/*!< in: trx system */
+	ulint		n)	/*!< in: index of slot */
+{
+	ut_ad(n < TRX_SYS_N_RSEGS);
+
+	return(sys->rseg_array[n]);
+}
+
+/**********************************************************************//**
+Gets a pointer to the transaction system header and x-latches its page.
+@return	pointer to system header, page x-latched. */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+	mtr_t*	mtr)	/*!< in: mtr */
+{
+	buf_block_t*	block;
+	trx_sysf_t*	header;
+
+	ut_ad(mtr);
+
+	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+			     RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+
+	header = TRX_SYS + buf_block_get_frame(block);
+
+	return(header);
+}
+
+/*****************************************************************//**
+Gets the space of the nth rollback segment slot in the trx system
+file copy.
+@return	space id */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
+			      + i * TRX_SYS_RSEG_SLOT_SIZE
+			      + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
+}
+
+/*****************************************************************//**
+Gets the page number of the nth rollback segment slot in the trx system
+header.
+@return	page number, FIL_NULL if slot unused */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx system header */
+	ulint		i,		/*!< in: slot index == rseg id */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
+			      + i * TRX_SYS_RSEG_SLOT_SIZE
+			      + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
+}
+
+/*****************************************************************//**
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys file copy */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		space,		/*!< in: space id */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
+			 + i * TRX_SYS_RSEG_SLOT_SIZE
+			 + TRX_SYS_RSEG_SPACE,
+			 space,
+			 MLOG_4BYTES, mtr);
+}
+
+/*****************************************************************//**
+Sets the page number of the nth rollback segment slot in the trx system
+header. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/*!< in: trx sys header */
+	ulint		i,		/*!< in: slot index == rseg id */
+	ulint		page_no,	/*!< in: page number, FIL_NULL if the
+					slot is reset to unused */
+	mtr_t*		mtr)		/*!< in: mtr */
+{
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
+			 + i * TRX_SYS_RSEG_SLOT_SIZE
+			 + TRX_SYS_RSEG_PAGE_NO,
+			 page_no,
+			 MLOG_4BYTES, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*****************************************************************//**
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+	byte*		ptr,	/*!< in: pointer to memory where written */
+	trx_id_t	id)	/*!< in: id */
+{
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
+	mach_write_to_6(ptr, id);
+}
+
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************//**
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_...
+@return	id */
+UNIV_INLINE
+trx_id_t
+trx_read_trx_id(
+/*============*/
+	const byte*	ptr)	/*!< in: pointer to memory from where to read */
+{
+#if DATA_TRX_ID_LEN != 6
+# error "DATA_TRX_ID_LEN != 6"
+#endif
+	return(mach_read_from_6(ptr));
+}
+
+/****************************************************************//**
+Looks for the trx handle with the given id in rw_trx_list.
+The caller must be holding trx_sys->mutex.
+@return	the trx handle or NULL if not found;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_get_rw_trx_by_id(
+/*=================*/
+	trx_id_t	trx_id)	/*!< in: trx id to search for */
+{
+	trx_t*		trx;
+	ulint		len;
+	trx_t*		first;
+
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	len = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+
+	if (len == 0) {
+		return(NULL);
+	}
+
+	/* Because the list is ordered on trx id in descending order,
+	we try to speed things up a bit. */
+
+	trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+	assert_trx_in_rw_list(trx);
+
+	if (trx_id == trx->id) {
+		return(trx);
+	} else if (len == 1 || trx_id > trx->id) {
+		return(NULL);
+	}
+
+	first = trx;
+
+	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+	assert_trx_in_rw_list(trx);
+
+	if (trx_id == trx->id) {
+		return(trx);
+	} else if (len == 2 || trx_id < trx->id) {
+		return(NULL);
+	}
+
+	/* Search the list from the lower end (tail). */
+	if (trx_id < (first->id + trx->id) >> 1) {
+		for (trx = UT_LIST_GET_PREV(trx_list, trx);
+		     trx != NULL && trx_id > trx->id;
+		     trx = UT_LIST_GET_PREV(trx_list, trx)) {
+			assert_trx_in_rw_list(trx);
+		}
+	} else {
+		for (trx = UT_LIST_GET_NEXT(trx_list, first);
+		     trx != NULL && trx_id < trx->id;
+		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+			assert_trx_in_rw_list(trx);
+		}
+	}
+
+	return((trx != NULL && trx->id == trx_id) ? trx : NULL);
+}
+
+/****************************************************************//**
+Returns the minimum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->state
+to find out if the minimum trx id transaction itself is active, or already
+committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
+@return	the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
+UNIV_INLINE
+trx_id_t
+trx_rw_min_trx_id_low(void)
+/*=======================*/
+{
+	trx_id_t	id;
+	const trx_t*	trx;
+
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
+
+	if (trx == NULL) {
+		id = trx_sys->max_trx_id;
+	} else {
+		assert_trx_in_rw_list(trx);
+		id = trx->id;
+	}
+
+	return(id);
+}
+
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/***********************************************************//**
+Assert that a transaction has been recovered.
+@return TRUE */
+UNIV_INLINE
+ibool
+trx_assert_recovered(
+/*=================*/
+	trx_id_t	trx_id)		/*!< in: transaction identifier */
+{
+	const trx_t*	trx;
+
+	mutex_enter(&trx_sys->mutex);
+
+	trx = trx_get_rw_trx_by_id(trx_id);
+	ut_a(trx->is_recovered);
+
+	mutex_exit(&trx_sys->mutex);
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
+/****************************************************************//**
+Returns the minimum trx id in rw trx list. This is the smallest id for which
+the rw trx can possibly be active. (But, you must look at the trx->state
+to find out if the minimum trx id transaction itself is active, or already
+committed.)
+@return	the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
+UNIV_INLINE
+trx_id_t
+trx_rw_min_trx_id(void)
+/*===================*/
+{
+	trx_id_t	id;
+
+	mutex_enter(&trx_sys->mutex);
+
+	id = trx_rw_min_trx_id_low();
+
+	mutex_exit(&trx_sys->mutex);
+
+	return(id);
+}
+
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. Caller must hold
+trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
+transaction may already have been committed.
+@return	transaction instance if active, or NULL;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_rw_is_active_low(
+/*=================*/
+	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
+	ibool*		corrupt)	/*!< in: NULL or pointer to a flag
+					that will be set if corrupt */
+{
+	trx_t*		trx;
+
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	if (trx_id < trx_rw_min_trx_id_low()) {
+
+		trx = NULL;
+	} else if (trx_id >= trx_sys->max_trx_id) {
+
+		/* There must be corruption: we let the caller handle the
+		diagnostic prints in this case. */
+
+		trx = NULL;
+		if (corrupt != NULL) {
+			*corrupt = TRUE;
+		}
+	} else {
+		trx = trx_get_rw_trx_by_id(trx_id);
+
+		if (trx != NULL
+		    && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
+
+			trx = NULL;
+		}
+	}
+
+	return(trx);
+}
+
+/****************************************************************//**
+Checks if a rw transaction with the given id is active. If the caller is
+not holding lock_sys->mutex, the transaction may already have been
+committed.
+@return	transaction instance if active, or NULL;
+the pointer must not be dereferenced unless lock_sys->mutex was
+acquired before calling this function and is still being held */
+UNIV_INLINE
+trx_t*
+trx_rw_is_active(
+/*=============*/
+	trx_id_t	trx_id,		/*!< in: trx id of the transaction */
+	ibool*		corrupt)	/*!< in: NULL or pointer to a flag
+					that will be set if corrupt */
+{
+	trx_t*		trx;
+
+	mutex_enter(&trx_sys->mutex);
+
+	trx = trx_rw_is_active_low(trx_id, corrupt);
+
+	mutex_exit(&trx_sys->mutex);
+
+	return(trx);
+}
+
+/*****************************************************************//**
+Allocates a new transaction id.
+@return	new, allocated trx id */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_new_trx_id(void)
+/*========================*/
+{
+	ut_ad(mutex_own(&trx_sys->mutex));
+
+	/* VERY important: after the database is started, max_trx_id value is
+	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
+	will evaluate to TRUE when this function is first time called,
+	and the value for trx id will be written to disk-based header!
+	Thus trx id values will not overlap when the database is
+	repeatedly started! */
+
+	if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) {
+
+		trx_sys_flush_max_trx_id();
+	}
+
+	return(trx_sys->max_trx_id++);
+}
+
+/*****************************************************************//**
+Determines the maximum transaction id.
+@return maximum currently allocated trx id; will be stale after the
+next call to trx_sys_get_new_trx_id() */
+UNIV_INLINE
+trx_id_t
+trx_sys_get_max_trx_id(void)
+/*========================*/
+{
+#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
+	trx_id_t	max_trx_id;
+#endif
+
+	ut_ad(!mutex_own(&trx_sys->mutex));
+
+#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
+	/* Avoid torn reads. */
+	mutex_enter(&trx_sys->mutex);
+	max_trx_id = trx_sys->max_trx_id;
+	mutex_exit(&trx_sys->mutex);
+	return(max_trx_id);
+#else
+	/* Perform a dirty read. Callers should be prepared for stale
+	values, and we know that the value fits in a machine word, so
+	that it will be read and written atomically. */
+	return(trx_sys->max_trx_id);
+#endif
+}
+
+/*****************************************************************//**
+Get the number of transaction in the system, independent of their state.
+@return count of transactions in trx_sys_t::rw_trx_list */
+UNIV_INLINE
+ulint
+trx_sys_get_n_rw_trx(void)
+/*======================*/
+{
+	ulint	n_trx;
+
+	mutex_enter(&trx_sys->mutex);
+
+	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+
+	mutex_exit(&trx_sys->mutex);
+
+	return(n_trx);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
new file mode 100644
index 00000000000..144e1803975
--- /dev/null
+++ b/storage/innobase/include/trx0trx.h
@@ -0,0 +1,1116 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.h
+The transaction
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0trx_h
+#define trx0trx_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "dict0types.h"
+#ifndef UNIV_HOTBACKUP
+#include "lock0types.h"
+#include "log0log.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "mem0mem.h"
+#include "read0types.h"
+#include "trx0xa.h"
+#include "ut0vec.h"
+#include "fts0fts.h"
+
+/** Dummy session used currently in MySQL interface */
+extern sess_t*	trx_dummy_sess;
+
+/********************************************************************//**
+Releases the search latch if trx has reserved it. */
+UNIV_INLINE
+void
+trx_search_latch_release_if_reserved(
+/*=================================*/
+	trx_t*		trx); /*!< in: transaction */
+/******************************************************************//**
+Set detailed error message for the transaction. */
+UNIV_INTERN
+void
+trx_set_detailed_error(
+/*===================*/
+	trx_t*		trx,	/*!< in: transaction struct */
+	const char*	msg);	/*!< in: detailed error message */
+/*************************************************************//**
+Set detailed error message for the transaction from a file. Note that the
+file is rewinded before reading from it. */
+UNIV_INTERN
+void
+trx_set_detailed_error_from_file(
+/*=============================*/
+	trx_t*	trx,	/*!< in: transaction struct */
+	FILE*	file);	/*!< in: file to read message from */
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return	the error info */
+UNIV_INLINE
+const dict_index_t*
+trx_get_error_info(
+/*===============*/
+	const trx_t*	trx);	/*!< in: trx object */
+/********************************************************************//**
+Creates a transaction object for MySQL.
+@return	own: transaction object */
+UNIV_INTERN
+trx_t*
+trx_allocate_for_mysql(void);
+/*========================*/
+/********************************************************************//**
+Creates a transaction object for background operations by the master thread.
+@return	own: transaction object */
+UNIV_INTERN
+trx_t*
+trx_allocate_for_background(void);
+/*=============================*/
+/********************************************************************//**
+Frees a transaction object of a background operation of the master thread. */
+UNIV_INTERN
+void
+trx_free_for_background(
+/*====================*/
+	trx_t*	trx);	/*!< in, own: trx object */
+/********************************************************************//**
+At shutdown, frees a transaction object that is in the PREPARED state. */
+UNIV_INTERN
+void
+trx_free_prepared(
+/*==============*/
+	trx_t*	trx)	/*!< in, own: trx object */
+	UNIV_COLD __attribute__((nonnull));
+/********************************************************************//**
+Frees a transaction object for MySQL. */
+UNIV_INTERN
+void
+trx_free_for_mysql(
+/*===============*/
+	trx_t*	trx);	/*!< in, own: trx object */
+/****************************************************************//**
+Creates trx objects for transactions and initializes the trx list of
+trx_sys at database start. Rollback segment and undo log lists must
+already exist when this function is called, because the lists of
+transactions to be rolled back or cleaned up are built based on the
+undo log lists. */
+UNIV_INTERN
+void
+trx_lists_init_at_db_start(void);
+/*============================*/
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started_xa(t)				\
+	{							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_if_not_started_xa_low((t));			\
+	}
+#else
+#define trx_start_if_not_started_xa(t)				\
+	trx_start_if_not_started_xa_low((t))
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+UNIV_INTERN
+void
+trx_start_if_not_started_xa_low(
+/*============================*/
+	trx_t*	trx);	/*!< in: transaction */
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+UNIV_INTERN
+void
+trx_start_if_not_started_low(
+/*=========================*/
+	trx_t*	trx);	/*!< in: transaction */
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started(t)				\
+	{							\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_if_not_started_low((t));			\
+	}
+#else
+#define trx_start_if_not_started(t)				\
+	trx_start_if_not_started_low((t))
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+	trx_t*		trx,	/*!< in/out: transaction */
+	trx_dict_op_t	op)	/*!< in: dictionary operation type */
+	__attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+#define trx_start_for_ddl(t, o)					\
+	{							\
+	ut_ad((t)->start_file == 0);				\
+	(t)->start_line = __LINE__;				\
+	(t)->start_file = __FILE__;				\
+	trx_start_for_ddl_low((t), (o));			\
+	}
+#else
+#define trx_start_for_ddl(t, o)					\
+	trx_start_for_ddl_low((t), (o))
+#endif /* UNIV_DEBUG */
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit(
+/*=======*/
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
+/****************************************************************//**
+Commits a transaction and a mini-transaction. */
+UNIV_INTERN
+void
+trx_commit_low(
+/*===========*/
+	trx_t*	trx,	/*!< in/out: transaction */
+	mtr_t*	mtr)	/*!< in/out: mini-transaction (will be committed),
+			or NULL if trx made no modifications */
+	__attribute__((nonnull(1)));
+/****************************************************************//**
+Cleans up a transaction at database startup. The cleanup is needed if
+the transaction already got to the middle of a commit when the database
+crashed, and we cannot roll it back. */
+UNIV_INTERN
+void
+trx_cleanup_at_db_startup(
+/*======================*/
+	trx_t*	trx);	/*!< in: transaction */
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return	DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+trx_commit_for_mysql(
+/*=================*/
+	trx_t*	trx);	/*!< in/out: transaction */
+/**********************************************************************//**
+Does the transaction prepare for MySQL. */
+UNIV_INTERN
+void
+trx_prepare_for_mysql(
+/*==================*/
+	trx_t*	trx);	/*!< in/out: trx handle */
+/**********************************************************************//**
+This function is used to find number of prepared transactions and
+their transaction objects for a recovery.
+@return	number of prepared transactions */
+UNIV_INTERN
+int
+trx_recover_for_mysql(
+/*==================*/
+	XID*	xid_list,	/*!< in/out: prepared transactions */
+	ulint	len);		/*!< in: number of slots in xid_list */
+/*******************************************************************//**
+This function is used to find one X/Open XA distributed transaction
+which is in the prepared state
+@return	trx or NULL; on match, the trx->xid will be invalidated;
+note that the trx may have been committed, unless the caller is
+holding lock_sys->mutex */
+UNIV_INTERN
+trx_t *
+trx_get_trx_by_xid(
+/*===============*/
+	const XID*	xid);	/*!< in: X/Open XA transaction identifier */
+/**********************************************************************//**
+If required, flushes the log to disk if we called trx_commit_for_mysql()
+with trx->flush_log_later == TRUE. */
+UNIV_INTERN
+void
+trx_commit_complete_for_mysql(
+/*==========================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+	__attribute__((nonnull));
+/**********************************************************************//**
+Marks the latest SQL statement ended. */
+UNIV_INTERN
+void
+trx_mark_sql_stat_end(
+/*==================*/
+	trx_t*	trx);	/*!< in: trx handle */
+/********************************************************************//**
+Assigns a read view for a consistent read query. All the consistent reads
+within the same transaction will get the same read view, which is created
+when this function is first called for a new started transaction.
+@return	consistent read view */
+UNIV_INTERN
+read_view_t*
+trx_assign_read_view(
+/*=================*/
+	trx_t*	trx);	/*!< in: active transaction */
+/****************************************************************//**
+Prepares a transaction for commit/rollback. */
+UNIV_INTERN
+void
+trx_commit_or_rollback_prepare(
+/*===========================*/
+	trx_t*	trx);	/*!< in/out: transaction */
+/*********************************************************************//**
+Creates a commit command node struct.
+@return	own: commit node struct */
+UNIV_INTERN
+commit_node_t*
+trx_commit_node_create(
+/*===================*/
+	mem_heap_t*	heap);	/*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return	query thread to run next, or NULL */
+UNIV_INTERN
+que_thr_t*
+trx_commit_step(
+/*============*/
+	que_thr_t*	thr);	/*!< in: query thread */
+
+/**********************************************************************//**
+Prints info about a transaction.
+Caller must hold trx_sys->mutex. */
+UNIV_INTERN
+void
+trx_print_low(
+/*==========*/
+	FILE*		f,
+			/*!< in: output stream */
+	const trx_t*	trx,
+			/*!< in: transaction */
+	ulint		max_query_len,
+			/*!< in: max query length to print,
+			or 0 to use the default max length */
+	ulint		n_rec_locks,
+			/*!< in: lock_number_of_rows_locked(&trx->lock) */
+	ulint		n_trx_locks,
+			/*!< in: length of trx->lock.trx_locks */
+	ulint		heap_size)
+			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Prints info about a transaction.
+The caller must hold lock_sys->mutex and trx_sys->mutex.
+When possible, use trx_print() instead. */
+UNIV_INTERN
+void
+trx_print_latched(
+/*==============*/
+	FILE*		f,		/*!< in: output stream */
+	const trx_t*	trx,		/*!< in: transaction */
+	ulint		max_query_len)	/*!< in: max query length to print,
+					or 0 to use the default max length */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Prints info about a transaction.
+Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+UNIV_INTERN
+void
+trx_print(
+/*======*/
+	FILE*		f,		/*!< in: output stream */
+	const trx_t*	trx,		/*!< in: transaction */
+	ulint		max_query_len)	/*!< in: max query length to print,
+					or 0 to use the default max length */
+	__attribute__((nonnull));
+
+/**********************************************************************//**
+Determine if a transaction is a dictionary operation.
+@return	dictionary operation mode */
+UNIV_INLINE
+enum trx_dict_op_t
+trx_get_dict_operation(
+/*===================*/
+	const trx_t*	trx)	/*!< in: transaction */
+	__attribute__((pure));
+/**********************************************************************//**
+Flag a transaction a dictionary operation. */
+UNIV_INLINE
+void
+trx_set_dict_operation(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: transaction */
+	enum trx_dict_op_t	op);	/*!< in: operation, not
+					TRX_DICT_OP_NONE */
+
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx_sys->mutex, or it must be the thread
+that is serving a running transaction.
+A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
+unless it is a non-locking autocommit read only transaction, which is only
+in trx_sys->mysql_trx_list.
+@return	TRUE if trx->state == state */
+UNIV_INLINE
+ibool
+trx_state_eq(
+/*=========*/
+	const trx_t*	trx,	/*!< in: transaction */
+	trx_state_t	state)	/*!< in: state;
+				if state != TRX_STATE_NOT_STARTED
+				asserts that
+				trx->state != TRX_STATE_NOT_STARTED */
+	__attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+/**********************************************************************//**
+Asserts that a transaction has been started.
+The caller must hold trx_sys->mutex.
+@return TRUE if started */
+UNIV_INTERN
+ibool
+trx_assert_started(
+/*===============*/
+	const trx_t*	trx)	/*!< in: transaction */
+	__attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Determines if the currently running transaction has been interrupted.
+@return	TRUE if interrupted */
+UNIV_INTERN
+ibool
+trx_is_interrupted(
+/*===============*/
+	const trx_t*	trx);	/*!< in: transaction */
+/**********************************************************************//**
+Determines if the currently running transaction is in strict mode.
+@return	TRUE if strict */
+UNIV_INTERN
+ibool
+trx_is_strict(
+/*==========*/
+	trx_t*	trx);	/*!< in: transaction */
+#else /* !UNIV_HOTBACKUP */
+#define trx_is_interrupted(trx) FALSE
+#endif /* !UNIV_HOTBACKUP */
+
+/*******************************************************************//**
+Calculates the "weight" of a transaction. The weight of one transaction
+is estimated as the number of altered rows + the number of locked rows.
+@param t	transaction
+@return		transaction weight */
+#define TRX_WEIGHT(t)	((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
+
+/*******************************************************************//**
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return	TRUE if weight(a) >= weight(b) */
+UNIV_INTERN
+ibool
+trx_weight_ge(
+/*==========*/
+	const trx_t*	a,	/*!< in: the first transaction to be compared */
+	const trx_t*	b);	/*!< in: the second transaction to be compared */
+
+/* Maximum length of a string that can be returned by
+trx_get_que_state_str(). */
+#define TRX_QUE_STATE_STR_MAX_LEN	12 /* "ROLLING BACK" */
+
+/*******************************************************************//**
+Retrieves transaction's que state in a human readable string. The string
+should not be free()'d or modified.
+@return	string in the data segment */
+UNIV_INLINE
+const char*
+trx_get_que_state_str(
+/*==================*/
+	const trx_t*	trx);	/*!< in: transaction */
+
+/****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+	trx_t*		trx);		/*!< A read-only transaction that
+					needs to be assigned a RBS. */
+/*******************************************************************//**
+Transactions that aren't started by the MySQL server don't set
+the trx_t::mysql_thd field. For such transactions we set the lock
+wait timeout to 0 instead of the user configured value that comes
+from innodb_lock_wait_timeout via trx_t::mysql_thd.
+@param trx	transaction
+@return		lock wait timeout in seconds */
+#define trx_lock_wait_timeout_get(trx)					\
+	((trx)->mysql_thd != NULL					\
+	 ? thd_lock_wait_timeout((trx)->mysql_thd)			\
+	 : 0)
+
+/*******************************************************************//**
+Determine if the transaction is a non-locking autocommit select
+(implied read-only).
+@param t	transaction
+@return true	if non-locking autocommit select transaction. */
+#define trx_is_autocommit_non_locking(t)				\
+((t)->auto_commit && (t)->will_lock == 0)
+
+/*******************************************************************//**
+Determine if the transaction is a non-locking autocommit select
+with an explicit check for the read-only status.
+@param t	transaction
+@return true	if non-locking autocommit read-only transaction. */
+#define trx_is_ac_nl_ro(t)						\
+((t)->read_only && trx_is_autocommit_non_locking((t)))
+
+/*******************************************************************//**
+Assert that the transaction is in the trx_sys_t::rw_trx_list */
+#define assert_trx_in_rw_list(t) do {					\
+	ut_ad(!(t)->read_only);						\
+	assert_trx_in_list(t);						\
+} while (0)
+
+/*******************************************************************//**
+Assert that the transaction is either in trx_sys->ro_trx_list or
+trx_sys->rw_trx_list but not both and it cannot be an autocommit
+non-locking select */
+#define assert_trx_in_list(t) do {					\
+	ut_ad((t)->in_ro_trx_list == (t)->read_only);			\
+	ut_ad((t)->in_rw_trx_list == !(t)->read_only);			\
+	ut_ad(!trx_is_autocommit_non_locking((t)));			\
+	switch ((t)->state) {						\
+	case TRX_STATE_PREPARED:					\
+		/* fall through */					\
+	case TRX_STATE_ACTIVE:						\
+	case TRX_STATE_COMMITTED_IN_MEMORY:				\
+		continue;						\
+	case TRX_STATE_NOT_STARTED:					\
+		break;							\
+	}								\
+	ut_error;							\
+} while (0)
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Assert that an autocommit non-locking select cannot be in the
+ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+The tranasction must be in the mysql_trx_list. */
+# define assert_trx_nonlocking_or_in_list(t)				\
+	do {								\
+		if (trx_is_autocommit_non_locking(t)) {			\
+			trx_state_t	t_state = (t)->state;		\
+			ut_ad((t)->read_only);				\
+			ut_ad(!(t)->is_recovered);			\
+			ut_ad(!(t)->in_ro_trx_list);			\
+			ut_ad(!(t)->in_rw_trx_list);			\
+			ut_ad((t)->in_mysql_trx_list);			\
+			ut_ad(t_state == TRX_STATE_NOT_STARTED		\
+			      || t_state == TRX_STATE_ACTIVE);		\
+		} else {						\
+			assert_trx_in_list(t);				\
+		}							\
+	} while (0)
+#else /* UNIV_DEBUG */
+/*******************************************************************//**
+Assert that an autocommit non-locking slect cannot be in the
+ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
+The tranasction must be in the mysql_trx_list. */
+# define assert_trx_nonlocking_or_in_list(trx) ((void)0)
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Latching protocol for trx_lock_t::que_state.  trx_lock_t::que_state
+captures the state of the query thread during the execution of a query.
+This is different from a transaction state. The query state of a transaction
+can be updated asynchronously by other threads.  The other threads can be
+system threads, like the timeout monitor thread or user threads executing
+other queries. Another thing to be mindful of is that there is a delay between
+when a query thread is put into LOCK_WAIT state and before it actually starts
+waiting.  Between these two events it is possible that the query thread is
+granted the lock it was waiting for, which implies that the state can be changed
+asynchronously.
+
+All these operations take place within the context of locking. Therefore state
+changes within the locking code must acquire both the lock mutex and the
+trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
+trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
+to only acquire the trx->mutex.
+To query the state either of the mutexes is sufficient within the locking
+code and no mutex is required when the query thread is no longer waiting. */
+
+/** The locks and state of an active transaction. Protected by
+lock_sys->mutex, trx->mutex or both. */
+struct trx_lock_t {
+	ulint		n_active_thrs;	/*!< number of active query threads */
+
+	trx_que_t	que_state;	/*!< valid when trx->state
+					== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
+					TRX_QUE_LOCK_WAIT, ... */
+
+	lock_t*		wait_lock;	/*!< if trx execution state is
+					TRX_QUE_LOCK_WAIT, this points to
+					the lock request, otherwise this is
+					NULL; set to non-NULL when holding
+					both trx->mutex and lock_sys->mutex;
+					set to NULL when holding
+					lock_sys->mutex; readers should
+					hold lock_sys->mutex, except when
+					they are holding trx->mutex and
+					wait_lock==NULL */
+	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
+					to and checked against lock_mark_counter
+					by lock_deadlock_recursive(). */
+	ibool		was_chosen_as_deadlock_victim;
+					/*!< when the transaction decides to
+					wait for a lock, it sets this to FALSE;
+					if another transaction chooses this
+					transaction as a victim in deadlock
+					resolution, it sets this to TRUE.
+					Protected by trx->mutex. */
+	time_t		wait_started;	/*!< lock wait started at this time,
+					protected only by lock_sys->mutex */
+
+	que_thr_t*	wait_thr;	/*!< query thread belonging to this
+					trx that is in QUE_THR_LOCK_WAIT
+					state. For threads suspended in a
+					lock wait, this is protected by
+					lock_sys->mutex. Otherwise, this may
+					only be modified by the thread that is
+					serving the running transaction. */
+
+	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
+					protected by lock_sys->mutex */
+
+	UT_LIST_BASE_NODE_T(lock_t)
+			trx_locks;	/*!< locks requested
+					by the transaction;
+					insertions are protected by trx->mutex
+					and lock_sys->mutex; removals are
+					protected by lock_sys->mutex */
+
+	ib_vector_t*	table_locks;	/*!< All table locks requested by this
+					transaction, including AUTOINC locks */
+
+	ibool		cancel;		/*!< TRUE if the transaction is being
+					rolled back either via deadlock
+					detection or due to lock timeout. The
+					caller has to acquire the trx_t::mutex
+					in order to cancel the locks. In
+					lock_trx_table_locks_remove() we
+					check for this cancel of a transaction's
+					locks and avoid reacquiring the trx
+					mutex to prevent recursive deadlocks.
+					Protected by both the lock sys mutex
+					and the trx_t::mutex. */
+};
+
+#define TRX_MAGIC_N	91118598
+
+/** The transaction handle
+
+Normally, there is a 1:1 relationship between a transaction handle
+(trx) and a session (client connection). One session is associated
+with exactly one user transaction. There are some exceptions to this:
+
+* For DDL operations, a subtransaction is allocated that modifies the
+data dictionary tables. Lock waits and deadlocks are prevented by
+acquiring the dict_operation_lock before starting the subtransaction
+and releasing it after committing the subtransaction.
+
+* The purge system uses a special transaction that is not associated
+with any session.
+
+* If the system crashed or it was quickly shut down while there were
+transactions in the ACTIVE or PREPARED state, these transactions would
+no longer be associated with a session when the server is restarted.
+
+A session may be served by at most one thread at a time. The serving
+thread of a session might change in some MySQL implementations.
+Therefore we do not have os_thread_get_curr_id() assertions in the code.
+
+Normally, only the thread that is currently associated with a running
+transaction may access (read and modify) the trx object, and it may do
+so without holding any mutex. The following are exceptions to this:
+
+* trx_rollback_resurrected() may access resurrected (connectionless)
+transactions while the system is already processing new user
+transactions. The trx_sys->mutex prevents a race condition between it
+and lock_trx_release_locks() [invoked by trx_commit()].
+
+* trx_print_low() may access transactions not associated with the current
+thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
+
+* When a transaction handle is in the trx_sys->mysql_trx_list or
+trx_sys->trx_list, some of its fields must not be modified without
+holding trx_sys->mutex exclusively.
+
+* The locking code (in particular, lock_deadlock_recursive() and
+lock_rec_convert_impl_to_expl()) will access transactions associated
+to other connections. The locks of transactions are protected by
+lock_sys->mutex and sometimes by trx->mutex. */
+
+struct trx_t{
+	ulint		magic_n;
+
+	ib_mutex_t	mutex;		/*!< Mutex protecting the fields
+					state and lock
+					(except some fields of lock, which
+					are protected by lock_sys->mutex) */
+
+	/** State of the trx from the point of view of concurrency control
+	and the valid state transitions.
+
+	Possible states:
+
+	TRX_STATE_NOT_STARTED
+	TRX_STATE_ACTIVE
+	TRX_STATE_PREPARED
+	TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
+
+	Valid state transitions are:
+
+	Regular transactions:
+	* NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
+
+	Auto-commit non-locking read-only:
+	* NOT_STARTED -> ACTIVE -> NOT_STARTED
+
+	XA (2PC):
+	* NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
+
+	Recovered XA:
+	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
+
+	XA (2PC) (shutdown before ROLLBACK or COMMIT):
+	* NOT_STARTED -> PREPARED -> (freed)
+
+	Latching and various transaction lists membership rules:
+
+	XA (2PC) transactions are always treated as non-autocommit.
+
+	Transitions to ACTIVE or NOT_STARTED occur when
+	!in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
+
+	Autocommit non-locking read-only transactions move between states
+	without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list.
+
+	When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
+	it is a user transaction. It cannot be in ro_trx_list or rw_trx_list.
+
+	ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
+	The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
+
+	ACTIVE->COMMITTED is possible when the transaction is in
+	ro_trx_list or rw_trx_list.
+
+	Transitions to COMMITTED are protected by both lock_sys->mutex
+	and trx->mutex.
+
+	NOTE: Some of these state change constraints are an overkill,
+	currently only required for a consistent view for printing stats.
+	This unnecessarily adds a huge cost for the general case.
+
+	NOTE: In the future we should add read only transactions to the
+	ro_trx_list the first time they try to acquire a lock ie. by default
+	we treat all read-only transactions as non-locking.  */
+	trx_state_t	state;
+
+	trx_lock_t	lock;		/*!< Information about the transaction
+					locks and state. Protected by
+					trx->mutex or lock_sys->mutex
+					or both */
+	ulint		is_recovered;	/*!< 0=normal transaction,
+					1=recovered, must be rolled back,
+					protected by trx_sys->mutex when
+					trx->in_rw_trx_list holds */
+
+	/* These fields are not protected by any mutex. */
+	const char*	op_info;	/*!< English text describing the
+					current operation, or an empty
+					string */
+	ulint		isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
+	ulint		check_foreigns;	/*!< normally TRUE, but if the user
+					wants to suppress foreign key checks,
+					(in table imports, for example) we
+					set this FALSE */
+	/*------------------------------*/
+	/* MySQL has a transaction coordinator to coordinate two phase
+	commit between multiple storage engines and the binary log. When
+	an engine participates in a transaction, it's responsible for
+	registering itself using the trans_register_ha() API. */
+	unsigned	is_registered:1;/* This flag is set to 1 after the
+					transaction has been registered with
+					the coordinator using the XA API, and
+					is set to 0 after commit or rollback. */
+	unsigned	owns_prepare_mutex:1;/* 1 if owns prepare mutex, if
+					this is set to 1 then registered should
+					also be set to 1. This is used in the
+					XA code */
+	/*------------------------------*/
+	ulint		check_unique_secondary;
+					/*!< normally TRUE, but if the user
+					wants to speed up inserts by
+					suppressing unique key checks
+					for secondary indexes when we decide
+					if we can use the insert buffer for
+					them, we set this FALSE */
+	ulint		support_xa;	/*!< normally we do the XA two-phase
+					commit steps, but by setting this to
+					FALSE, one can save CPU time and about
+					150 bytes in the undo log size as then
+					we skip XA steps */
+	ulint		flush_log_later;/* In 2PC, we hold the
+					prepare_commit mutex across
+					both phases. In that case, we
+					defer flush of the logs to disk
+					until after we release the
+					mutex. */
+	ulint		must_flush_log_later;/*!< this flag is set to TRUE in
+					trx_commit() if flush_log_later was
+					TRUE, and there were modifications by
+					the transaction; in that case we must
+					flush the log in
+					trx_commit_complete_for_mysql() */
+	ulint		duplicates;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+	ulint		has_search_latch;
+					/*!< TRUE if this trx has latched the
+					search system latch in S-mode */
+	ulint		search_latch_timeout;
+					/*!< If we notice that someone is
+					waiting for our S-lock on the search
+					latch to be released, we wait in
+					row0sel.cc for BTR_SEA_TIMEOUT new
+					searches until we try to keep
+					the search latch again over
+					calls from MySQL; this is intended
+					to reduce contention on the search
+					latch */
+	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op */
+
+	/* Fields protected by the srv_conc_mutex. */
+	ulint		declared_to_be_inside_innodb;
+					/*!< this is TRUE if we have declared
+					this transaction in
+					srv_conc_enter_innodb to be inside the
+					InnoDB engine */
+	ulint		n_tickets_to_enter_innodb;
+					/*!< this can be > 0 only when
+					declared_to_... is TRUE; when we come
+					to srv_conc_innodb_enter, if the value
+					here is > 0, we decrement this by 1 */
+	ulint		dict_operation_lock_mode;
+					/*!< 0, RW_S_LATCH, or RW_X_LATCH:
+					the latch mode trx currently holds
+					on dict_operation_lock. Protected
+					by dict_operation_lock. */
+
+	trx_id_t	no;		/*!< transaction serialization number:
+					max trx id shortly before the
+					transaction is moved to
+					COMMITTED_IN_MEMORY state.
+					Protected by trx_sys_t::mutex
+					when trx->in_rw_trx_list. Initially
+					set to TRX_ID_MAX. */
+
+	time_t		start_time;	/*!< time the trx state last time became
+					TRX_STATE_ACTIVE */
+	trx_id_t	id;		/*!< transaction id */
+	XID		xid;		/*!< X/Open XA transaction
+					identification to identify a
+					transaction branch */
+	lsn_t		commit_lsn;	/*!< lsn at the time of the commit */
+	table_id_t	table_id;	/*!< Table to drop iff dict_operation
+					== TRX_DICT_OP_TABLE, or 0. */
+	/*------------------------------*/
+	THD*		mysql_thd;	/*!< MySQL thread handle corresponding
+					to this trx, or NULL */
+	const char*	mysql_log_file_name;
+					/*!< if MySQL binlog is used, this field
+					contains a pointer to the latest file
+					name; this is NULL if binlog is not
+					used */
+	ib_int64_t	mysql_log_offset;
+					/*!< if MySQL binlog is used, this
+					field contains the end offset of the
+					binlog entry */
+	/*------------------------------*/
+	ulint		n_mysql_tables_in_use; /*!< number of Innobase tables
+					used in the processing of the current
+					SQL statement in MySQL */
+	ulint		mysql_n_tables_locked;
+					/*!< how many tables the current SQL
+					statement uses, except those
+					in consistent read */
+	/*------------------------------*/
+	UT_LIST_NODE_T(trx_t)
+			trx_list;	/*!< list of transactions;
+					protected by trx_sys->mutex.
+					The same node is used for both
+					trx_sys_t::ro_trx_list and
+					trx_sys_t::rw_trx_list */
+#ifdef UNIV_DEBUG
+	/** The following two fields are mutually exclusive. */
+	/* @{ */
+
+	ibool		in_ro_trx_list;	/*!< TRUE if in trx_sys->ro_trx_list */
+	ibool		in_rw_trx_list;	/*!< TRUE if in trx_sys->rw_trx_list */
+	/* @} */
+#endif /* UNIV_DEBUG */
+	UT_LIST_NODE_T(trx_t)
+			mysql_trx_list;	/*!< list of transactions created for
+					MySQL; protected by trx_sys->mutex */
+#ifdef UNIV_DEBUG
+	ibool		in_mysql_trx_list;
+					/*!< TRUE if in
+					trx_sys->mysql_trx_list */
+#endif /* UNIV_DEBUG */
+	/*------------------------------*/
+	dberr_t		error_state;	/*!< 0 if no error, otherwise error
+					number; NOTE That ONLY the thread
+					doing the transaction is allowed to
+					set this field: this is NOT protected
+					by any mutex */
+	const dict_index_t*error_info;	/*!< if the error number indicates a
+					duplicate key error, a pointer to
+					the problematic index is stored here */
+	ulint		error_key_num;	/*!< if the index creation fails to a
+					duplicate key error, a mysql key
+					number of that index is stored here */
+	sess_t*		sess;		/*!< session of the trx, NULL if none */
+	que_t*		graph;		/*!< query currently run in the session,
+					or NULL if none; NOTE that the query
+					belongs to the session, and it can
+					survive over a transaction commit, if
+					it is a stored procedure with a COMMIT
+					WORK statement, for instance */
+	mem_heap_t*	global_read_view_heap;
+					/*!< memory heap for the global read
+					view */
+	read_view_t*	global_read_view;
+					/*!< consistent read view associated
+					to a transaction or NULL */
+	read_view_t*	read_view;	/*!< consistent read view used in the
+					transaction or NULL, this read view
+					if defined can be normal read view
+					associated to a transaction (i.e.
+					same as global_read_view) or read view
+					associated to a cursor */
+	/*------------------------------*/
+	UT_LIST_BASE_NODE_T(trx_named_savept_t)
+			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
+					oldest first */
+	/*------------------------------*/
+	ib_mutex_t	undo_mutex;	/*!< mutex protecting the fields in this
+					section (down to undo_no_arr), EXCEPT
+					last_sql_stat_start, which can be
+					accessed only when we know that there
+					cannot be any activity in the undo
+					logs! */
+	undo_no_t	undo_no;	/*!< next undo log record number to
+					assign; since the undo log is
+					private for a transaction, this
+					is a simple ascending sequence
+					with no gaps; thus it represents
+					the number of modified/inserted
+					rows in a transaction */
+	trx_savept_t	last_sql_stat_start;
+					/*!< undo_no when the last sql statement
+					was started: in case of an error, trx
+					is rolled back down to this undo
+					number; see note at undo_mutex! */
+	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
+					transaction, or NULL if not assigned
+					yet */
+	trx_undo_t*	insert_undo;	/*!< pointer to the insert undo log, or
+					NULL if no inserts performed yet */
+	trx_undo_t*	update_undo;	/*!< pointer to the update undo log, or
+					NULL if no update performed yet */
+	undo_no_t	roll_limit;	/*!< least undo number to undo during
+					a rollback */
+	ulint		pages_undone;	/*!< number of undo log pages undone
+					since the last undo log truncation */
+	trx_undo_arr_t*	undo_no_arr;	/*!< array of undo numbers of undo log
+					records which are currently processed
+					by a rollback operation */
+	/*------------------------------*/
+	ulint		n_autoinc_rows;	/*!< no. of AUTO-INC rows required for
+					an SQL statement. This is useful for
+					multi-row INSERTs */
+	ib_vector_t*    autoinc_locks;  /* AUTOINC locks held by this
+					transaction. Note that these are
+					also in the lock list trx_locks. This
+					vector needs to be freed explicitly
+					when the trx instance is destroyed.
+					Protected by lock_sys->mutex. */
+	/*------------------------------*/
+	ibool		read_only;	/*!< TRUE if transaction is flagged
+					as a READ-ONLY transaction.
+					if !auto_commit || will_lock > 0
+					then it will added to the list
+					trx_sys_t::ro_trx_list. A read only
+					transaction will not be assigned an
+					UNDO log. Non-locking auto-commit
+					read-only transaction will not be on
+					either list. */
+	ibool		auto_commit;	/*!< TRUE if it is an autocommit */
+	ulint		will_lock;	/*!< Will acquire some locks. Increment
+					each time we determine that a lock will
+					be acquired by the MySQL layer. */
+	bool		ddl;		/*!< true if it is a transaction that
+					is being started for a DDL operation */
+	/*------------------------------*/
+	fts_trx_t*	fts_trx;	/*!< FTS information, or NULL if
+					transaction hasn't modified tables
+					with FTS indexes (yet). */
+	doc_id_t	fts_next_doc_id;/* The document id used for updates */
+	/*------------------------------*/
+	ulint		flush_tables;	/*!< if "covering" the FLUSH TABLES",
+					count of tables being flushed. */
+
+	/*------------------------------*/
+#ifdef UNIV_DEBUG
+	ulint		start_line;	/*!< Track where it was started from */
+	const char*	start_file;	/*!< Filename where it was started */
+#endif /* UNIV_DEBUG */
+	/*------------------------------*/
+	bool		api_trx;	/*!< trx started by InnoDB API */
+	bool		api_auto_commit;/*!< automatic commit */
+	bool		read_write;	/*!< if read and write operation */
+
+	/*------------------------------*/
+	char detailed_error[256];	/*!< detailed error message for last
+					error, or empty. */
+};
+
+/* Transaction isolation levels (trx->isolation_level) */
+#define TRX_ISO_READ_UNCOMMITTED	0	/* dirty read: non-locking
+						SELECTs are performed so that
+						we do not look at a possible
+						earlier version of a record;
+						thus they are not 'consistent'
+						reads under this isolation
+						level; otherwise like level
+						2 */
+
+#define TRX_ISO_READ_COMMITTED		1	/* somewhat Oracle-like
+						isolation, except that in
+						range UPDATE and DELETE we
+						must block phantom rows
+						with next-key locks;
+						SELECT ... FOR UPDATE and ...
+						LOCK IN SHARE MODE only lock
+						the index records, NOT the
+						gaps before them, and thus
+						allow free inserting;
+						each consistent read reads its
+						own snapshot */
+
+#define TRX_ISO_REPEATABLE_READ		2	/* this is the default;
+						all consistent reads in the
+						same trx read the same
+						snapshot;
+						full next-key locking used
+						in locking reads to block
+						insertions into gaps */
+
+#define TRX_ISO_SERIALIZABLE		3	/* all plain SELECTs are
+						converted to LOCK IN SHARE
+						MODE reads */
+
+/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
+Multiple flags can be combined with bitwise OR. */
+#define TRX_DUP_IGNORE	1	/* duplicate rows are to be updated */
+#define TRX_DUP_REPLACE	2	/* duplicate rows are to be replaced */
+
+
+/* Types of a trx signal */
+#define TRX_SIG_NO_SIGNAL		0
+#define TRX_SIG_TOTAL_ROLLBACK		1
+#define TRX_SIG_ROLLBACK_TO_SAVEPT	2
+#define TRX_SIG_COMMIT			3
+#define TRX_SIG_BREAK_EXECUTION		5
+
+/* Sender types of a signal */
+#define TRX_SIG_SELF		0	/* sent by the session itself, or
+					by an error occurring within this
+					session */
+#define TRX_SIG_OTHER_SESS	1	/* sent by another session (which
+					must hold rights to this) */
+
+/** Commit node states */
+enum commit_node_state {
+	COMMIT_NODE_SEND = 1,	/*!< about to send a commit signal to
+				the transaction */
+	COMMIT_NODE_WAIT	/*!< commit signal sent to the transaction,
+				waiting for completion */
+};
+
+/** Commit command node in a query graph */
+struct commit_node_t{
+	que_common_t	common;	/*!< node type: QUE_NODE_COMMIT */
+	enum commit_node_state
+			state;	/*!< node execution state */
+};
+
+
+/** Test if trx->mutex is owned. */
+#define trx_mutex_own(t) mutex_own(&t->mutex)
+
+/** Acquire the trx->mutex. */
+#define trx_mutex_enter(t) do {			\
+	mutex_enter(&t->mutex);			\
+} while (0)
+
+/** Release the trx->mutex. */
+#define trx_mutex_exit(t) do {			\
+	mutex_exit(&t->mutex);			\
+} while (0)
+
+/** @brief The latch protecting the adaptive search system
+
+This latch protects the
+(1) hash index;
+(2) columns of a record to which we have a pointer in the hash index;
+
+but does NOT protect:
+
+(3) next record offset field in a record;
+(4) next or previous records on the same page.
+
+Bear in mind (3) and (4) when using the hash index.
+*/
+extern rw_lock_t*	btr_search_latch_temp;
+
+/** The latch protecting the adaptive search system */
+#define btr_search_latch	(*btr_search_latch_temp)
+
+#ifndef UNIV_NONINL
+#include "trx0trx.ic"
+#endif
+#endif /* !UNIV_HOTBACKUP */
+
+#endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
new file mode 100644
index 00000000000..69ee17ea98b
--- /dev/null
+++ b/storage/innobase/include/trx0trx.ic
@@ -0,0 +1,180 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.ic
+The transaction
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx_sys->mutex, or it must be the thread
+that is serving a running transaction.
+A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
+unless it is a non-locking autocommit read only transaction, which is only
+in trx_sys->mysql_trx_list.
+@return	TRUE if trx->state == state */
+UNIV_INLINE
+ibool
+trx_state_eq(
+/*=========*/
+	const trx_t*	trx,	/*!< in: transaction */
+	trx_state_t	state)	/*!< in: state;
+				if state != TRX_STATE_NOT_STARTED
+				asserts that
+				trx->state != TRX_STATE_NOT_STARTED */
+{
+#ifdef UNIV_DEBUG
+	switch (trx->state) {
+	case TRX_STATE_PREPARED:
+		ut_ad(!trx_is_autocommit_non_locking(trx));
+		return(trx->state == state);
+
+	case TRX_STATE_ACTIVE:
+		assert_trx_nonlocking_or_in_list(trx);
+		return(state == trx->state);
+
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		assert_trx_in_list(trx);
+		return(state == trx->state);
+
+	case TRX_STATE_NOT_STARTED:
+		/* This state is not allowed for running transactions. */
+		ut_a(state == TRX_STATE_NOT_STARTED);
+		ut_ad(!trx->in_rw_trx_list);
+		ut_ad(!trx->in_ro_trx_list);
+		return(state == trx->state);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(trx->state == state);
+}
+
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return	the error info */
+UNIV_INLINE
+const dict_index_t*
+trx_get_error_info(
+/*===============*/
+	const trx_t*	trx)	/*!< in: trx object */
+{
+	return(trx->error_info);
+}
+
+/*******************************************************************//**
+Retrieves transaction's que state in a human readable string. The string
+should not be free()'d or modified.
+@return	string in the data segment */
+UNIV_INLINE
+const char*
+trx_get_que_state_str(
+/*==================*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	/* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */
+	switch (trx->lock.que_state) {
+	case TRX_QUE_RUNNING:
+		return("RUNNING");
+	case TRX_QUE_LOCK_WAIT:
+		return("LOCK WAIT");
+	case TRX_QUE_ROLLING_BACK:
+		return("ROLLING BACK");
+	case TRX_QUE_COMMITTING:
+		return("COMMITTING");
+	default:
+		return("UNKNOWN");
+	}
+}
+
+/**********************************************************************//**
+Determine if a transaction is a dictionary operation.
+@return	dictionary operation mode */
+UNIV_INLINE
+enum trx_dict_op_t
+trx_get_dict_operation(
+/*===================*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+	trx_dict_op_t op = static_cast<trx_dict_op_t>(trx->dict_operation);
+
+#ifdef UNIV_DEBUG
+	switch (op) {
+	case TRX_DICT_OP_NONE:
+	case TRX_DICT_OP_TABLE:
+	case TRX_DICT_OP_INDEX:
+		return(op);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(op);
+}
+/**********************************************************************//**
+Flag a transaction a dictionary operation. */
+UNIV_INLINE
+void
+trx_set_dict_operation(
+/*===================*/
+	trx_t*			trx,	/*!< in/out: transaction */
+	enum trx_dict_op_t	op)	/*!< in: operation, not
+					TRX_DICT_OP_NONE */
+{
+#ifdef UNIV_DEBUG
+	enum trx_dict_op_t	old_op = trx_get_dict_operation(trx);
+
+	switch (op) {
+	case TRX_DICT_OP_NONE:
+		ut_error;
+		break;
+	case TRX_DICT_OP_TABLE:
+		switch (old_op) {
+		case TRX_DICT_OP_NONE:
+		case TRX_DICT_OP_INDEX:
+		case TRX_DICT_OP_TABLE:
+			goto ok;
+		}
+		ut_error;
+		break;
+	case TRX_DICT_OP_INDEX:
+		ut_ad(old_op == TRX_DICT_OP_NONE);
+		break;
+	}
+ok:
+#endif /* UNIV_DEBUG */
+
+	trx->ddl = true;
+	trx->dict_operation = op;
+}
+
+/********************************************************************//**
+Releases the search latch if trx has reserved it. */
+UNIV_INLINE
+void
+trx_search_latch_release_if_reserved(
+/*=================================*/
+	trx_t*	   trx) /*!< in: transaction */
+{
+	if (trx->has_search_latch) {
+		rw_lock_s_unlock(&btr_search_latch);
+
+		trx->has_search_latch = FALSE;
+	}
+}
+
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
new file mode 100644
index 00000000000..7ca95131328
--- /dev/null
+++ b/storage/innobase/include/trx0types.h
@@ -0,0 +1,147 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0types.h
+Transaction system global type definitions
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0types_h
+#define trx0types_h
+
+#include "ut0byte.h"
+
+/** printf(3) format used for printing DB_TRX_ID and other system fields */
+#define TRX_ID_FMT		IB_ID_FMT
+
+/** maximum length that a formatted trx_t::id could take, not including
+the terminating NUL character. */
+#define TRX_ID_MAX_LEN		17
+
+/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
+enum trx_que_t {
+	TRX_QUE_RUNNING,		/*!< transaction is running */
+	TRX_QUE_LOCK_WAIT,		/*!< transaction is waiting for
+					a lock */
+	TRX_QUE_ROLLING_BACK,		/*!< transaction is rolling back */
+	TRX_QUE_COMMITTING		/*!< transaction is committing */
+};
+
+/** Transaction states (trx_t::state) */
+enum trx_state_t {
+	TRX_STATE_NOT_STARTED,
+	TRX_STATE_ACTIVE,
+	TRX_STATE_PREPARED,			/* Support for 2PC/XA */
+	TRX_STATE_COMMITTED_IN_MEMORY
+};
+
+/** Type of data dictionary operation */
+enum trx_dict_op_t {
+	/** The transaction is not modifying the data dictionary. */
+	TRX_DICT_OP_NONE = 0,
+	/** The transaction is creating a table or an index, or
+	dropping a table.  The table must be dropped in crash
+	recovery.  This and TRX_DICT_OP_NONE are the only possible
+	operation modes in crash recovery. */
+	TRX_DICT_OP_TABLE = 1,
+	/** The transaction is creating or dropping an index in an
+	existing table.  In crash recovery, the data dictionary
+	must be locked, but the table must not be dropped. */
+	TRX_DICT_OP_INDEX = 2
+};
+
+/** Memory objects */
+/* @{ */
+/** Transaction */
+struct trx_t;
+/** The locks and state of an active transaction */
+struct trx_lock_t;
+/** Transaction system */
+struct trx_sys_t;
+/** Signal */
+struct trx_sig_t;
+/** Rollback segment */
+struct trx_rseg_t;
+/** Transaction undo log */
+struct trx_undo_t;
+/** Array of undo numbers of undo records being rolled back or purged */
+struct trx_undo_arr_t;
+/** A cell of trx_undo_arr_t */
+struct trx_undo_inf_t;
+/** The control structure used in the purge operation */
+struct trx_purge_t;
+/** Rollback command node in a query graph */
+struct roll_node_t;
+/** Commit command node in a query graph */
+struct commit_node_t;
+/** SAVEPOINT command node in a query graph */
+struct trx_named_savept_t;
+/* @} */
+
+/** Rollback contexts */
+enum trx_rb_ctx {
+	RB_NONE = 0,	/*!< no rollback */
+	RB_NORMAL,	/*!< normal rollback */
+	RB_RECOVERY_PURGE_REC,
+			/*!< rolling back an incomplete transaction,
+			in crash recovery, rolling back an
+			INSERT that was performed by updating a
+			delete-marked record; if the delete-marked record
+			no longer exists in an active read view, it will
+			be purged */
+	RB_RECOVERY	/*!< rolling back an incomplete transaction,
+			in crash recovery */
+};
+
+/** Row identifier (DB_ROW_ID, DATA_ROW_ID) */
+typedef ib_id_t	row_id_t;
+/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
+typedef ib_id_t	trx_id_t;
+/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */
+typedef ib_id_t	roll_ptr_t;
+/** Undo number */
+typedef ib_id_t	undo_no_t;
+
+/** Maximum transaction identifier */
+#define TRX_ID_MAX	IB_ID_MAX
+
+/** Transaction savepoint */
+struct trx_savept_t{
+	undo_no_t	least_undo_no;	/*!< least undo number to undo */
+};
+
+/** File objects */
+/* @{ */
+/** Transaction system header */
+typedef byte	trx_sysf_t;
+/** Rollback segment header */
+typedef byte	trx_rsegf_t;
+/** Undo segment header */
+typedef byte	trx_usegf_t;
+/** Undo log header */
+typedef byte	trx_ulogf_t;
+/** Undo log page header */
+typedef byte	trx_upagef_t;
+
+/** Undo log record */
+typedef	byte	trx_undo_rec_t;
+/* @} */
+
+#endif
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
new file mode 100644
index 00000000000..61b0dabb1e6
--- /dev/null
+++ b/storage/innobase/include/trx0undo.h
@@ -0,0 +1,604 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0undo.h
+Transaction undo log
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0undo_h
+#define trx0undo_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "page0types.h"
+#include "trx0xa.h"
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Builds a roll pointer.
+@return	roll pointer */
+UNIV_INLINE
+roll_ptr_t
+trx_undo_build_roll_ptr(
+/*====================*/
+	ibool	is_insert,	/*!< in: TRUE if insert undo log */
+	ulint	rseg_id,	/*!< in: rollback segment id */
+	ulint	page_no,	/*!< in: page number */
+	ulint	offset);	/*!< in: offset of the undo entry within page */
+/***********************************************************************//**
+Decodes a roll pointer. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer */
+	ibool*		is_insert,	/*!< out: TRUE if insert undo log */
+	ulint*		rseg_id,	/*!< out: rollback segment id */
+	ulint*		page_no,	/*!< out: page number */
+	ulint*		offset);	/*!< out: offset of the undo
+					entry within page */
+/***********************************************************************//**
+Returns TRUE if the roll pointer is of the insert type.
+@return	TRUE if insert undo log */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+	roll_ptr_t	roll_ptr);	/*!< in: roll pointer */
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return	true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+	__attribute__((nonnull, pure, warn_unused_result));
+#endif /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+	byte*		ptr,		/*!< in: pointer to memory where
+					written */
+	roll_ptr_t	roll_ptr);	/*!< in: roll ptr */
+/*****************************************************************//**
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_...
+@return	roll ptr */
+UNIV_INLINE
+roll_ptr_t
+trx_read_roll_ptr(
+/*==============*/
+	const byte*	ptr);	/*!< in: pointer to memory from where to read */
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Gets an undo log page and x-latches it.
+@return	pointer to page x-latched */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*==============*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Gets an undo log page and s-latches it.
+@return	pointer to page s-latched */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*========================*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset);/*!< in: undo log header offset on page */
+/******************************************************************//**
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset);/*!< in: undo log header offset on page */
+/******************************************************************//**
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset);	/*!< in: undo log header offset on page */
+/******************************************************************//**
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset);/*!< in: undo log header offset on page */
+/***********************************************************************//**
+Gets the previous record in an undo log.
+@return	undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_prev_rec(
+/*==================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	bool		shared,	/*!< in: true=S-latch, false=X-latch */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************************//**
+Gets the next record in an undo log.
+@return	undo log record, the page s-latched, NULL if none */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_next_rec(
+/*==================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset,	/*!< in: undo log header offset on page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/***********************************************************************//**
+Gets the first record in an undo log.
+@return	undo log record, the page latched, NULL if none */
+UNIV_INTERN
+trx_undo_rec_t*
+trx_undo_get_first_rec(
+/*===================*/
+	ulint	space,	/*!< in: undo log header space */
+	ulint	zip_size,/*!< in: compressed page size in bytes
+			or 0 for uncompressed pages */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset,	/*!< in: undo log header offset on page */
+	ulint	mode,	/*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
+	mtr_t*	mtr);	/*!< in: mtr */
+/********************************************************************//**
+Tries to add a page to the undo log segment where the undo log is placed.
+@return	X-latched block if success, else NULL */
+UNIV_INTERN
+buf_block_t*
+trx_undo_add_page(
+/*==============*/
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory object */
+	mtr_t*		mtr)	/*!< in: mtr which does not have a latch to any
+				undo log page; the caller must have reserved
+				the rollback segment mutex */
+	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Frees the last undo log page.
+The caller must hold the rollback segment mutex. */
+UNIV_INTERN
+void
+trx_undo_free_last_page_func(
+/*==========================*/
+#ifdef UNIV_DEBUG
+	const trx_t*	trx,	/*!< in: transaction */
+#endif /* UNIV_DEBUG */
+	trx_undo_t*	undo,	/*!< in/out: undo log memory copy */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction which does not
+				have a latch to any undo log page or which
+				has allocated the undo log page */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+# define trx_undo_free_last_page(trx,undo,mtr)	\
+	trx_undo_free_last_page_func(trx,undo,mtr)
+#else /* UNIV_DEBUG */
+# define trx_undo_free_last_page(trx,undo,mtr)	\
+	trx_undo_free_last_page_func(undo,mtr)
+#endif /* UNIV_DEBUG */
+
+/***********************************************************************//**
+Truncates an undo log from the end. This function is used during a rollback
+to free space from an undo log. */
+UNIV_INTERN
+void
+trx_undo_truncate_end_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+	const trx_t*	trx,	/*!< in: transaction whose undo log it is */
+#endif /* UNIV_DEBUG */
+	trx_undo_t*	undo,	/*!< in/out: undo log */
+	undo_no_t	limit)	/*!< in: all undo records with undo number
+				>= this value should be truncated */
+	__attribute__((nonnull));
+#ifdef UNIV_DEBUG
+# define trx_undo_truncate_end(trx,undo,limit)		\
+	trx_undo_truncate_end_func(trx,undo,limit)
+#else /* UNIV_DEBUG */
+# define trx_undo_truncate_end(trx,undo,limit)		\
+	trx_undo_truncate_end_func(undo,limit)
+#endif /* UNIV_DEBUG */
+
+/***********************************************************************//**
+Truncates an undo log from the start. This function is used during a purge
+operation. */
+UNIV_INTERN
+void
+trx_undo_truncate_start(
+/*====================*/
+	trx_rseg_t*	rseg,		/*!< in: rollback segment */
+	ulint		space,		/*!< in: space id of the log */
+	ulint		hdr_page_no,	/*!< in: header page number */
+	ulint		hdr_offset,	/*!< in: header offset on the page */
+	undo_no_t	limit);		/*!< in: all undo pages with
+					undo numbers < this value
+					should be truncated; NOTE that
+					the function only frees whole
+					pages; the header page is not
+					freed, but emptied, if all the
+					records there are < limit */
+/********************************************************************//**
+Initializes the undo log lists for a rollback segment memory copy.
+This function is only called when the database is started or a new
+rollback segment created.
+@return	the combined size of undo log segments in pages */
+UNIV_INTERN
+ulint
+trx_undo_lists_init(
+/*================*/
+	trx_rseg_t*	rseg);	/*!< in: rollback segment memory object */
+/**********************************************************************//**
+Assigns an undo log for a transaction. A new undo log is created or a cached
+undo log reused.
+@return DB_SUCCESS if undo log assign successful, possible error codes
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
+DB_OUT_OF_MEMORY */
+UNIV_INTERN
+dberr_t
+trx_undo_assign_undo(
+/*=================*/
+	trx_t*		trx,	/*!< in: transaction */
+	ulint		type)	/*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+	__attribute__((nonnull, warn_unused_result));
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction finish.
+@return	undo log segment header page, x-latched */
+UNIV_INTERN
+page_t*
+trx_undo_set_state_at_finish(
+/*=========================*/
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	mtr_t*		mtr);	/*!< in: mtr */
+/******************************************************************//**
+Sets the state of the undo log segment at a transaction prepare.
+@return	undo log segment header page, x-latched */
+UNIV_INTERN
+page_t*
+trx_undo_set_state_at_prepare(
+/*==========================*/
+	trx_t*		trx,	/*!< in: transaction */
+	trx_undo_t*	undo,	/*!< in: undo log memory copy */
+	mtr_t*		mtr);	/*!< in: mtr */
+
+/**********************************************************************//**
+Adds the update undo log header as the first in the history list, and
+frees the memory object, or puts it to the list of cached update undo log
+segments. */
+UNIV_INTERN
+void
+trx_undo_update_cleanup(
+/*====================*/
+	trx_t*	trx,		/*!< in: trx owning the update undo log */
+	page_t*	undo_page,	/*!< in: update undo log header page,
+				x-latched */
+	mtr_t*	mtr);		/*!< in: mtr */
+/******************************************************************//**
+Frees or caches an insert undo log after a transaction commit or rollback.
+Knowledge of inserts is not needed after a commit or rollback, therefore
+the data can be discarded. */
+UNIV_INTERN
+void
+trx_undo_insert_cleanup(
+/*====================*/
+	trx_t*	trx);	/*!< in: transaction handle */
+
+/********************************************************************//**
+At shutdown, frees the undo logs of a PREPARED transaction. */
+UNIV_INTERN
+void
+trx_undo_free_prepared(
+/*===================*/
+	trx_t*	trx)	/*!< in/out: PREPARED transaction */
+	UNIV_COLD __attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
+/***********************************************************//**
+Parses the redo log entry of an undo log page initialization.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_page_init(
+/*=====================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses the redo log entry of an undo log page header create or reuse.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_page_header(
+/*=======================*/
+	ulint	type,	/*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+/***********************************************************//**
+Parses the redo log entry of an undo log page header discard.
+@return	end of log record or NULL */
+UNIV_INTERN
+byte*
+trx_undo_parse_discard_latest(
+/*==========================*/
+	byte*	ptr,	/*!< in: buffer */
+	byte*	end_ptr,/*!< in: buffer end */
+	page_t*	page,	/*!< in: page or NULL */
+	mtr_t*	mtr);	/*!< in: mtr or NULL */
+/************************************************************************
+Frees an undo log memory copy. */
+UNIV_INTERN
+void
+trx_undo_mem_free(
+/*==============*/
+	trx_undo_t*	undo);		/* in: the undo object to be freed */
+
+/* Types of an undo log segment */
+#define	TRX_UNDO_INSERT		1	/* contains undo entries for inserts */
+#define	TRX_UNDO_UPDATE		2	/* contains undo entries for updates
+					and delete markings: in short,
+					modifys (the name 'UPDATE' is a
+					historical relic) */
+/* States of an undo log segment */
+#define TRX_UNDO_ACTIVE		1	/* contains an undo log of an active
+					transaction */
+#define	TRX_UNDO_CACHED		2	/* cached for quick reuse */
+#define	TRX_UNDO_TO_FREE	3	/* insert undo segment can be freed */
+#define	TRX_UNDO_TO_PURGE	4	/* update undo segment will not be
+					reused: it can be freed in purge when
+					all undo data in it is removed */
+#define	TRX_UNDO_PREPARED	5	/* contains an undo log of an
+					prepared transaction */
+
+#ifndef UNIV_HOTBACKUP
+/** Transaction undo log memory object; this is protected by the undo_mutex
+in the corresponding transaction object */
+
+struct trx_undo_t{
+	/*-----------------------------*/
+	ulint		id;		/*!< undo log slot number within the
+					rollback segment */
+	ulint		type;		/*!< TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
+	ulint		state;		/*!< state of the corresponding undo log
+					segment */
+	ibool		del_marks;	/*!< relevant only in an update undo
+					log: this is TRUE if the transaction may
+					have delete marked records, because of
+					a delete of a row or an update of an
+					indexed field; purge is then
+					necessary; also TRUE if the transaction
+					has updated an externally stored
+					field */
+	trx_id_t	trx_id;		/*!< id of the trx assigned to the undo
+					log */
+	XID		xid;		/*!< X/Open XA transaction
+					identification */
+	ibool		dict_operation;	/*!< TRUE if a dict operation trx */
+	table_id_t	table_id;	/*!< if a dict operation, then the table
+					id */
+	trx_rseg_t*	rseg;		/*!< rseg where the undo log belongs */
+	/*-----------------------------*/
+	ulint		space;		/*!< space id where the undo log
+					placed */
+	ulint		zip_size;	/*!< compressed page size of space
+					in bytes, or 0 for uncompressed */
+	ulint		hdr_page_no;	/*!< page number of the header page in
+					the undo log */
+	ulint		hdr_offset;	/*!< header offset of the undo log on
+				       	the page */
+	ulint		last_page_no;	/*!< page number of the last page in the
+					undo log; this may differ from
+					top_page_no during a rollback */
+	ulint		size;		/*!< current size in pages */
+	/*-----------------------------*/
+	ulint		empty;		/*!< TRUE if the stack of undo log
+					records is currently empty */
+	ulint		top_page_no;	/*!< page number where the latest undo
+					log record was catenated; during
+					rollback the page from which the latest
+					undo record was chosen */
+	ulint		top_offset;	/*!< offset of the latest undo record,
+					i.e., the topmost element in the undo
+					log if we think of it as a stack */
+	undo_no_t	top_undo_no;	/*!< undo number of the latest record */
+	buf_block_t*	guess_block;	/*!< guess for the buffer block where
+					the top page might reside */
+	/*-----------------------------*/
+	UT_LIST_NODE_T(trx_undo_t) undo_list;
+					/*!< undo log objects in the rollback
+					segment are chained into lists */
+};
+#endif /* !UNIV_HOTBACKUP */
+
+/** The offset of the undo log page header on pages of the undo log */
+#define	TRX_UNDO_PAGE_HDR	FSEG_PAGE_DATA
+/*-------------------------------------------------------------*/
+/** Transaction undo log page header offsets */
+/* @{ */
+#define	TRX_UNDO_PAGE_TYPE	0	/*!< TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
+#define	TRX_UNDO_PAGE_START	2	/*!< Byte offset where the undo log
+					records for the LATEST transaction
+					start on this page (remember that
+					in an update undo log, the first page
+					can contain several undo logs) */
+#define	TRX_UNDO_PAGE_FREE	4	/*!< On each page of the undo log this
+					field contains the byte offset of the
+					first free byte on the page */
+#define TRX_UNDO_PAGE_NODE	6	/*!< The file list node in the chain
+					of undo log pages */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_PAGE_HDR_SIZE	(6 + FLST_NODE_SIZE)
+					/*!< Size of the transaction undo
+					log page header, in bytes */
+/* @} */
+
+/** An update undo segment with just one page can be reused if it has
+at most this many bytes used; we must leave space at least for one new undo
+log header on the page */
+
+#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 * UNIV_PAGE_SIZE / 4)
+
+/* An update undo log segment may contain several undo logs on its first page
+if the undo logs took so little space that the segment could be cached and
+reused. All the undo log headers are then on the first page, and the last one
+owns the undo log records on subsequent pages if the segment is bigger than
+one page. If an undo log is stored in a segment, then on the first page it is
+allowed to have zero undo records, but if the segment extends to several
+pages, then all the rest of the pages must contain at least one undo log
+record. */
+
+/** The offset of the undo log segment header on the first page of the undo
+log segment */
+
+#define	TRX_UNDO_SEG_HDR	(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
+/** Undo log segment header */
+/* @{ */
+/*-------------------------------------------------------------*/
+#define	TRX_UNDO_STATE		0	/*!< TRX_UNDO_ACTIVE, ... */
+#define	TRX_UNDO_LAST_LOG	2	/*!< Offset of the last undo log header
+					on the segment header page, 0 if
+					none */
+#define	TRX_UNDO_FSEG_HEADER	4	/*!< Header for the file segment which
+					the undo log segment occupies */
+#define	TRX_UNDO_PAGE_LIST	(4 + FSEG_HEADER_SIZE)
+					/*!< Base node for the list of pages in
+					the undo log segment; defined only on
+					the undo log segment's first page */
+/*-------------------------------------------------------------*/
+/** Size of the undo log segment header */
+#define TRX_UNDO_SEG_HDR_SIZE	(4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
+/* @} */
+
+
+/** The undo log header. There can be several undo log headers on the first
+page of an update undo log segment. */
+/* @{ */
+/*-------------------------------------------------------------*/
+#define	TRX_UNDO_TRX_ID		0	/*!< Transaction id */
+#define	TRX_UNDO_TRX_NO		8	/*!< Transaction number of the
+					transaction; defined only if the log
+					is in a history list */
+#define TRX_UNDO_DEL_MARKS	16	/*!< Defined only in an update undo
+					log: TRUE if the transaction may have
+					done delete markings of records, and
+					thus purge is necessary */
+#define	TRX_UNDO_LOG_START	18	/*!< Offset of the first undo log record
+					of this log on the header page; purge
+					may remove undo log record from the
+					log start, and therefore this is not
+					necessarily the same as this log
+					header end offset */
+#define	TRX_UNDO_XID_EXISTS	20	/*!< TRUE if undo log header includes
+					X/Open XA transaction identification
+					XID */
+#define	TRX_UNDO_DICT_TRANS	21	/*!< TRUE if the transaction is a table
+					create, index create, or drop
+					transaction: in recovery
+					the transaction cannot be rolled back
+					in the usual way: a 'rollback' rather
+					means dropping the created or dropped
+					table, if it still exists */
+#define TRX_UNDO_TABLE_ID	22	/*!< Id of the table if the preceding
+					field is TRUE */
+#define	TRX_UNDO_NEXT_LOG	30	/*!< Offset of the next undo log header
+					on this page, 0 if none */
+#define	TRX_UNDO_PREV_LOG	32	/*!< Offset of the previous undo log
+					header on this page, 0 if none */
+#define TRX_UNDO_HISTORY_NODE	34	/*!< If the log is put to the history
+					list, the file list node is here */
+/*-------------------------------------------------------------*/
+/** Size of the undo log header without XID information */
+#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
+
+/* Note: the writing of the undo log old header is coded by a log record
+MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
+header is logged separately. In this sense, the XID is not really a member
+of the undo log header. TODO: do not append the XID to the log header if XA
+is not needed by the user. The XID wastes about 150 bytes of space in every
+undo log. In the history list we may have millions of undo logs, which means
+quite a large overhead. */
+
+/** X/Open XA Transaction Identification (XID) */
+/* @{ */
+/** xid_t::formatID */
+#define	TRX_UNDO_XA_FORMAT	(TRX_UNDO_LOG_OLD_HDR_SIZE)
+/** xid_t::gtrid_length */
+#define	TRX_UNDO_XA_TRID_LEN	(TRX_UNDO_XA_FORMAT + 4)
+/** xid_t::bqual_length */
+#define	TRX_UNDO_XA_BQUAL_LEN	(TRX_UNDO_XA_TRID_LEN + 4)
+/** Distributed transaction identifier data */
+#define	TRX_UNDO_XA_XID		(TRX_UNDO_XA_BQUAL_LEN + 4)
+/*--------------------------------------------------------------*/
+#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
+					/*!< Total size of the undo log header
+					with the XA XID */
+/* @} */
+
+#ifndef UNIV_NONINL
+#include "trx0undo.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
new file mode 100644
index 00000000000..577759d6c3d
--- /dev/null
+++ b/storage/innobase/include/trx0undo.ic
@@ -0,0 +1,363 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0undo.ic
+Transaction undo log
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "data0type.h"
+#include "page0page.h"
+
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Builds a roll pointer.
+@return	roll pointer */
+UNIV_INLINE
+roll_ptr_t
+trx_undo_build_roll_ptr(
+/*====================*/
+	ibool	is_insert,	/*!< in: TRUE if insert undo log */
+	ulint	rseg_id,	/*!< in: rollback segment id */
+	ulint	page_no,	/*!< in: page number */
+	ulint	offset)		/*!< in: offset of the undo entry within page */
+{
+	roll_ptr_t	roll_ptr;
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+	ut_ad(is_insert == 0 || is_insert == 1);
+	ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+	ut_ad(offset < 65536);
+
+	roll_ptr = (roll_ptr_t) is_insert << 55
+		| (roll_ptr_t) rseg_id << 48
+		| (roll_ptr_t) page_no << 16
+		| offset;
+	return(roll_ptr);
+}
+
+/***********************************************************************//**
+Decodes a roll pointer. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+	roll_ptr_t	roll_ptr,	/*!< in: roll pointer */
+	ibool*		is_insert,	/*!< out: TRUE if insert undo log */
+	ulint*		rseg_id,	/*!< out: rollback segment id */
+	ulint*		page_no,	/*!< out: page number */
+	ulint*		offset)		/*!< out: offset of the undo
+					entry within page */
+{
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+#if TRUE != 1
+# error "TRUE != 1"
+#endif
+	ut_ad(roll_ptr < (1ULL << 56));
+	*offset = (ulint) roll_ptr & 0xFFFF;
+	roll_ptr >>= 16;
+	*page_no = (ulint) roll_ptr & 0xFFFFFFFF;
+	roll_ptr >>= 32;
+	*rseg_id = (ulint) roll_ptr & 0x7F;
+	roll_ptr >>= 7;
+	*is_insert = (ibool) roll_ptr; /* TRUE==1 */
+}
+
+/***********************************************************************//**
+Returns TRUE if the roll pointer is of the insert type.
+@return	TRUE if insert undo log */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+	roll_ptr_t	roll_ptr)	/*!< in: roll pointer */
+{
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+#if TRUE != 1
+# error "TRUE != 1"
+#endif
+	ut_ad(roll_ptr < (1ULL << 56));
+	return((ibool) (roll_ptr >> 55));
+}
+
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return	true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+	const byte*	trx_id)	/*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+{
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error
+#endif
+	return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*****************************************************************//**
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+	byte*		ptr,		/*!< in: pointer to memory where
+					written */
+	roll_ptr_t	roll_ptr)	/*!< in: roll ptr */
+{
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+	mach_write_to_7(ptr, roll_ptr);
+}
+
+/*****************************************************************//**
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_...
+@return	roll ptr */
+UNIV_INLINE
+roll_ptr_t
+trx_read_roll_ptr(
+/*==============*/
+	const byte*	ptr)	/*!< in: pointer to memory from where to read */
+{
+#if DATA_ROLL_PTR_LEN != 7
+# error "DATA_ROLL_PTR_LEN != 7"
+#endif
+	return(mach_read_from_7(ptr));
+}
+
+#ifndef UNIV_HOTBACKUP
+/******************************************************************//**
+Gets an undo log page and x-latches it.
+@return	pointer to page x-latched */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*==============*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+					     RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+	return(buf_block_get_frame(block));
+}
+
+/******************************************************************//**
+Gets an undo log page and s-latches it.
+@return	pointer to page s-latched */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*========================*/
+	ulint	space,		/*!< in: space where placed */
+	ulint	zip_size,	/*!< in: compressed page size in bytes
+				or 0 for uncompressed pages */
+	ulint	page_no,	/*!< in: page number */
+	mtr_t*	mtr)		/*!< in: mtr */
+{
+	buf_block_t*	block = buf_page_get(space, zip_size, page_no,
+					     RW_S_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+	return(buf_block_get_frame(block));
+}
+
+/******************************************************************//**
+Returns the start offset of the undo log records of the specified undo
+log on the page.
+@return	start offset */
+UNIV_INLINE
+ulint
+trx_undo_page_get_start(
+/*====================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
+{
+	ulint	start;
+
+	if (page_no == page_get_page_no(undo_page)) {
+
+		start = mach_read_from_2(offset + undo_page
+					 + TRX_UNDO_LOG_START);
+	} else {
+		start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
+	}
+
+	return(start);
+}
+
+/******************************************************************//**
+Returns the end offset of the undo log records of the specified undo
+log on the page.
+@return	end offset */
+UNIV_INLINE
+ulint
+trx_undo_page_get_end(
+/*==================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
+{
+	trx_ulogf_t*	log_hdr;
+	ulint		end;
+
+	if (page_no == page_get_page_no(undo_page)) {
+
+		log_hdr = undo_page + offset;
+
+		end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
+
+		if (end == 0) {
+			end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+					       + TRX_UNDO_PAGE_FREE);
+		}
+	} else {
+		end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+				       + TRX_UNDO_PAGE_FREE);
+	}
+
+	return(end);
+}
+
+/******************************************************************//**
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset)	/*!< in: undo log header offset on page */
+{
+	page_t*	undo_page;
+	ulint	start;
+
+	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);
+
+	if (start + undo_page == rec) {
+
+		return(NULL);
+	}
+
+	return(undo_page + mach_read_from_2(rec - 2));
+}
+
+/******************************************************************//**
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+	trx_undo_rec_t*	rec,	/*!< in: undo log record */
+	ulint		page_no,/*!< in: undo log header page number */
+	ulint		offset)	/*!< in: undo log header offset on page */
+{
+	page_t*	undo_page;
+	ulint	end;
+	ulint	next;
+
+	undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
+
+	end = trx_undo_page_get_end(undo_page, page_no, offset);
+
+	next = mach_read_from_2(rec);
+
+	if (next == end) {
+
+		return(NULL);
+	}
+
+	return(undo_page + next);
+}
+
+/******************************************************************//**
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
+{
+	ulint	start;
+	ulint	end;
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);
+	end = trx_undo_page_get_end(undo_page, page_no, offset);
+
+	if (start == end) {
+
+		return(NULL);
+	}
+
+	return(undo_page + mach_read_from_2(undo_page + end - 2));
+}
+
+/******************************************************************//**
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists.
+@return	pointer to record, NULL if none */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+	page_t*	undo_page,/*!< in: undo log page */
+	ulint	page_no,/*!< in: undo log header page number */
+	ulint	offset)	/*!< in: undo log header offset on page */
+{
+	ulint	start;
+	ulint	end;
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);
+	end = trx_undo_page_get_end(undo_page, page_no, offset);
+
+	if (start == end) {
+
+		return(NULL);
+	}
+
+	return(undo_page + start);
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h
new file mode 100644
index 00000000000..7caddfb7ba4
--- /dev/null
+++ b/storage/innobase/include/trx0xa.h
@@ -0,0 +1,70 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*
+ * Start of xa.h header
+ *
+ * Define a symbol to prevent multiple inclusions of this header file
+ */
+#ifndef	XA_H
+#define	XA_H
+
+/*
+ * Transaction branch identification: XID and NULLXID:
+ */
+#ifndef XIDDATASIZE
+
+/** Sizes of transaction identifier */
+#define	XIDDATASIZE	128		/*!< maximum size of a transaction
+					identifier, in bytes */
+#define	MAXGTRIDSIZE	 64		/*!< maximum size in bytes of gtrid */
+#define	MAXBQUALSIZE	 64		/*!< maximum size in bytes of bqual */
+
+/** X/Open XA distributed transaction identifier */
+struct xid_t {
+	long formatID;			/*!< format identifier; -1
+					means that the XID is null */
+	long gtrid_length;		/*!< value from 1 through 64 */
+	long bqual_length;		/*!< value from 1 through 64 */
+	char data[XIDDATASIZE];		/*!< distributed transaction
+					identifier */
+};
+/** X/Open XA distributed transaction identifier */
+typedef	struct xid_t XID;
+#endif
+/** X/Open XA distributed transaction status codes */
+/* @{ */
+#define	XA_OK		0		/*!< normal execution */
+#define	XAER_ASYNC	-2		/*!< asynchronous operation already
+					outstanding */
+#define	XAER_RMERR	-3		/*!< a resource manager error
+					occurred in the transaction
+					branch */
+#define	XAER_NOTA	-4		/*!< the XID is not valid */
+#define	XAER_INVAL	-5		/*!< invalid arguments were given */
+#define	XAER_PROTO	-6		/*!< routine invoked in an improper
+					context */
+#define	XAER_RMFAIL	-7		/*!< resource manager unavailable */
+#define	XAER_DUPID	-8		/*!< the XID already exists */
+#define	XAER_OUTSIDE	-9		/*!< resource manager doing
+					work outside transaction */
+/* @} */
+#endif /* ifndef XA_H */
+/*
+ * End of xa.h header
+ */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
new file mode 100644
index 00000000000..8c325ecc88c
--- /dev/null
+++ b/storage/innobase/include/univ.i
@@ -0,0 +1,667 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/***********************************************************************//**
+@file include/univ.i
+Version control for database, common definitions, and include files
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#ifdef UNIV_HOTBACKUP
+#include "hb_univ.i"
+#endif /* UNIV_HOTBACKUP */
+
+/* aux macros to convert M into "123" (string) if M is defined like
+#define M 123 */
+#define _IB_TO_STR(s)	#s
+#define IB_TO_STR(s)	_IB_TO_STR(s)
+
+#define INNODB_VERSION_MAJOR	MYSQL_VERSION_MAJOR
+#define INNODB_VERSION_MINOR	MYSQL_VERSION_MINOR
+#define INNODB_VERSION_BUGFIX	MYSQL_VERSION_PATCH
+
+/* The following is the InnoDB version as shown in
+SELECT plugin_version FROM information_schema.plugins;
+calculated in make_version_string() in sql/sql_show.cc like this:
+"version >> 8" . "version & 0xff"
+because the version is shown with only one dot, we skip the last
+component, i.e. we show M.N.P as M.N */
+#define INNODB_VERSION_SHORT	\
+	(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+
+#define INNODB_VERSION_STR			\
+	IB_TO_STR(INNODB_VERSION_MAJOR) "."	\
+	IB_TO_STR(INNODB_VERSION_MINOR) "."	\
+	IB_TO_STR(INNODB_VERSION_BUGFIX)
+
+#define REFMAN "http://dev.mysql.com/doc/refman/"	\
+	IB_TO_STR(MYSQL_VERSION_MAJOR) "."		\
+	IB_TO_STR(MYSQL_VERSION_MINOR) "/en/"
+
+#ifdef MYSQL_DYNAMIC_PLUGIN
+/* In the dynamic plugin, redefine some externally visible symbols
+in order not to conflict with the symbols of a builtin InnoDB. */
+
+/* Rename all C++ classes that contain virtual functions, because we
+have not figured out how to apply the visibility=hidden attribute to
+the virtual method table (vtable) in GCC 3. */
+# define ha_innobase ha_innodb
+#endif /* MYSQL_DYNAMIC_PLUGIN */
+
+#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
+# undef __WIN__
+# define __WIN__
+
+# include <windows.h>
+
+# ifdef _NT_
+#  define __NT__
+# endif
+
+#else
+/* The defines used with MySQL */
+
+/* Include two header files from MySQL to make the Unix flavor used
+in compiling more Posix-compatible. These headers also define __WIN__
+if we are compiling on Windows. */
+
+#ifndef UNIV_HOTBACKUP
+# include <my_global.h>
+# include <my_pthread.h>
+#endif /* UNIV_HOTBACKUP */
+
+/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
+# include <sys/stat.h>
+# if !defined(__WIN__)
+#  include <sys/mman.h> /* mmap() for os0proc.cc */
+# endif
+
+/* Include the header file generated by GNU autoconf */
+# ifndef __WIN__
+#  ifndef UNIV_HOTBACKUP
+#   include "config.h"
+#  endif /* UNIV_HOTBACKUP */
+# endif
+
+# ifdef HAVE_SCHED_H
+#  include <sched.h>
+# endif
+
+/* We only try to do explicit inlining of functions with gcc and
+Sun Studio */
+
+# ifdef HAVE_PREAD
+#  define HAVE_PWRITE
+# endif
+
+#endif /* #if (defined(WIN32) || ... */
+
+#ifndef __WIN__
+#define __STDC_FORMAT_MACROS    /* Enable C99 printf format macros */
+#include <inttypes.h>
+#endif /* !__WIN__ */
+
+/* Following defines are to enable performance schema
+instrumentation in each of four InnoDB modules if
+HAVE_PSI_INTERFACE is defined. */
+#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP
+# define UNIV_PFS_MUTEX
+# define UNIV_PFS_RWLOCK
+/* For I/O instrumentation, performance schema rely
+on a native descriptor to identify the file, this
+descriptor could conflict with our OS level descriptor.
+Disable IO instrumentation on Windows until this is
+resolved */
+# ifndef __WIN__
+#  define UNIV_PFS_IO
+# endif
+# define UNIV_PFS_THREAD
+
+/* There are mutexes/rwlocks that we want to exclude from
+instrumentation even if their corresponding performance schema
+define is set. And this PFS_NOT_INSTRUMENTED is used
+as the key value to identify those objects that would
+be excluded from instrumentation. */
+# define PFS_NOT_INSTRUMENTED		ULINT32_UNDEFINED
+
+# define PFS_IS_INSTRUMENTED(key)	((key) != PFS_NOT_INSTRUMENTED)
+
+#endif /* HAVE_PSI_INTERFACE */
+
+#ifdef __WIN__
+# define YY_NO_UNISTD_H 1
+#endif /* __WIN__ */
+
+/*			DEBUG VERSION CONTROL
+			===================== */
+
+/* When this macro is defined then additional test functions will be
+compiled. These functions live at the end of each relevant source file
+and have "test_" prefix. These functions are not called from anywhere in
+the code, they can be called from gdb after
+innobase_start_or_create_for_mysql() has executed using the call
+command. Not tested on Windows. */
+/*
+#define UNIV_COMPILE_TEST_FUNCS
+*/
+
+#if defined HAVE_VALGRIND
+# define UNIV_DEBUG_VALGRIND
+#endif /* HAVE_VALGRIND */
+#if 0
+#define UNIV_DEBUG_VALGRIND			/* Enable extra
+						Valgrind instrumentation */
+#define UNIV_DEBUG_PRINT			/* Enable the compilation of
+						some debug print functions */
+#define UNIV_AHI_DEBUG				/* Enable adaptive hash index
+						debugging without UNIV_DEBUG */
+#define UNIV_BUF_DEBUG				/* Enable buffer pool
+						debugging without UNIV_DEBUG */
+#define UNIV_BLOB_LIGHT_DEBUG			/* Enable off-page column
+						debugging without UNIV_DEBUG */
+#define UNIV_DEBUG				/* Enable ut_ad() assertions
+						and disable UNIV_INLINE */
+#define UNIV_DEBUG_LOCK_VALIDATE		/* Enable
+						ut_ad(lock_rec_validate_page())
+						assertions. */
+#define UNIV_DEBUG_FILE_ACCESSES		/* Enable freed block access
+						debugging without UNIV_DEBUG */
+#define UNIV_LRU_DEBUG				/* debug the buffer pool LRU */
+#define UNIV_HASH_DEBUG				/* debug HASH_ macros */
+#define UNIV_LIST_DEBUG				/* debug UT_LIST_ macros */
+#define UNIV_LOG_LSN_DEBUG			/* write LSN to the redo log;
+this will break redo log file compatibility, but it may be useful when
+debugging redo log application problems. */
+#define UNIV_MEM_DEBUG				/* detect memory leaks etc */
+#define UNIV_IBUF_DEBUG				/* debug the insert buffer */
+#define UNIV_BLOB_DEBUG				/* track BLOB ownership;
+assumes that no BLOBs survive server restart */
+#define UNIV_IBUF_COUNT_DEBUG			/* debug the insert buffer;
+this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
+and the insert buffer must be empty when the database is started */
+#define UNIV_PERF_DEBUG                         /* debug flag that enables
+                                                light weight performance
+                                                related stuff. */
+#define UNIV_SYNC_DEBUG				/* debug mutex and latch
+operations (very slow); also UNIV_DEBUG must be defined */
+#define UNIV_SEARCH_DEBUG			/* debug B-tree comparisons */
+#define UNIV_SYNC_PERF_STAT			/* operation counts for
+						rw-locks and mutexes */
+#define UNIV_SEARCH_PERF_STAT			/* statistics for the
+						adaptive hash index */
+#define UNIV_SRV_PRINT_LATCH_WAITS		/* enable diagnostic output
+						in sync0sync.cc */
+#define UNIV_BTR_PRINT				/* enable functions for
+						printing B-trees */
+#define UNIV_ZIP_DEBUG				/* extensive consistency checks
+						for compressed pages */
+#define UNIV_ZIP_COPY				/* call page_zip_copy_recs()
+						more often */
+#define UNIV_AIO_DEBUG				/* prints info about
+						submitted and reaped AIO
+						requests to the log. */
+#define UNIV_STATS_DEBUG			/* prints various stats
+						related debug info from
+						dict0stats.c */
+#define FTS_INTERNAL_DIAG_PRINT                 /* FTS internal debugging
+                                                info output */
+#endif
+
+#define UNIV_BTR_DEBUG				/* check B-tree links */
+#define UNIV_LIGHT_MEM_DEBUG			/* light memory debugging */
+
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+			/* the above option prevents forcing of log to disk
+			at a buffer page write: it should be tested with this
+			option off; also some ibuf tests are suppressed */
+
+/* Linkage specifier for non-static InnoDB symbols (variables and functions)
+that are only referenced from within InnoDB, not from MySQL. We disable the
+GCC visibility directive on all Sun operating systems because there is no
+easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
+#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
+# define UNIV_INTERN __attribute__((visibility ("hidden")))
+#else
+# define UNIV_INTERN
+#endif
+#if defined(INNODB_COMPILER_HINTS)      \
+    && defined __GNUC__                 \
+    && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+/** Starting with GCC 4.3, the "cold" attribute is used to inform the
+compiler that a function is unlikely executed.  The function is
+optimized for size rather than speed and on many targets it is placed
+into special subsection of the text section so all cold functions
+appears close together improving code locality of non-cold parts of
+program.  The paths leading to call of cold functions within code are
+marked as unlikely by the branch prediction mechanism.  optimize a
+rarely invoked function for size instead for speed. */
+# define UNIV_COLD __attribute__((cold))
+#else
+# define UNIV_COLD /* empty */
+#endif
+
+#ifndef UNIV_MUST_NOT_INLINE
+/* Definition for inline version */
+
+#define UNIV_INLINE static inline
+
+#else /* !UNIV_MUST_NOT_INLINE */
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE	UNIV_INTERN
+
+#endif /* !UNIV_MUST_NOT_INLINE */
+
+#ifdef _WIN32
+#define UNIV_WORD_SIZE		4
+#elif defined(_WIN64)
+#define UNIV_WORD_SIZE		8
+#else
+/** MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */
+#define UNIV_WORD_SIZE		SIZEOF_LONG
+#endif
+
+/** The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT	8
+
+/** The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT	UNIV_WORD_SIZE
+
+/*
+			DATABASE VERSION CONTROL
+			========================
+*/
+
+/** There are currently two InnoDB file formats which are used to group
+features with similar restrictions and dependencies. Using an enum allows
+switch statements to give a compiler warning when a new one is introduced. */
+enum innodb_file_formats_enum {
+	/** Antelope File Format: InnoDB/MySQL up to 5.1.
+	This format includes REDUNDANT and COMPACT row formats */
+	UNIV_FORMAT_A		= 0,
+
+	/** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
+	This format includes COMPRESSED and DYNAMIC row formats.  It
+	includes the ability to create secondary indexes from data that
+	is not on the clustered index page and the ability to store more
+	data off the clustered index page. */
+	UNIV_FORMAT_B		= 1
+};
+
+typedef enum innodb_file_formats_enum innodb_file_formats_t;
+
+/** Minimum supported file format */
+#define UNIV_FORMAT_MIN		UNIV_FORMAT_A
+
+/** Maximum supported file format */
+#define UNIV_FORMAT_MAX		UNIV_FORMAT_B
+
+/** The 2-logarithm of UNIV_PAGE_SIZE: */
+#define UNIV_PAGE_SIZE_SHIFT	srv_page_size_shift
+
+/** The universal page size of the database */
+#define UNIV_PAGE_SIZE		((ulint) srv_page_size)
+
+/** log2 of smallest compressed page size (1<<10 == 1024 bytes)
+Note: This must never change! */
+#define UNIV_ZIP_SIZE_SHIFT_MIN		10
+
+/** log2 of largest compressed page size (1<<14 == 16384 bytes).
+A compressed page directory entry reserves 14 bits for the start offset
+and 2 bits for flags. This limits the uncompressed page size to 16k.
+Even though a 16k uncompressed page can theoretically be compressed
+into a larger compressed page, it is not a useful feature so we will
+limit both with this same constant. */
+#define UNIV_ZIP_SIZE_SHIFT_MAX		14
+
+/* Define the Min, Max, Default page sizes. */
+/** Minimum Page Size Shift (power of 2) */
+#define UNIV_PAGE_SIZE_SHIFT_MIN	12
+/** Maximum Page Size Shift (power of 2) */
+#define UNIV_PAGE_SIZE_SHIFT_MAX	14
+/** Default Page Size Shift (power of 2) */
+#define UNIV_PAGE_SIZE_SHIFT_DEF	14
+/** Original 16k InnoDB Page Size Shift, in case the default changes */
+#define UNIV_PAGE_SIZE_SHIFT_ORIG	14
+
+/** Minimum page size InnoDB currently supports. */
+#define UNIV_PAGE_SIZE_MIN	(1 << UNIV_PAGE_SIZE_SHIFT_MIN)
+/** Maximum page size InnoDB currently supports. */
+#define UNIV_PAGE_SIZE_MAX	(1 << UNIV_PAGE_SIZE_SHIFT_MAX)
+/** Default page size for InnoDB tablespaces. */
+#define UNIV_PAGE_SIZE_DEF	(1 << UNIV_PAGE_SIZE_SHIFT_DEF)
+/** Original 16k page size for InnoDB tablespaces. */
+#define UNIV_PAGE_SIZE_ORIG	(1 << UNIV_PAGE_SIZE_SHIFT_ORIG)
+
+/** Smallest compressed page size */
+#define UNIV_ZIP_SIZE_MIN	(1 << UNIV_ZIP_SIZE_SHIFT_MIN)
+
+/** Largest compressed page size */
+#define UNIV_ZIP_SIZE_MAX	(1 << UNIV_ZIP_SIZE_SHIFT_MAX)
+
+/** Number of supported page sizes (The convention 'ssize' is used
+for 'log2 minus 9' or the number of shifts starting with 512.)
+This number varies depending on UNIV_PAGE_SIZE. */
+#define UNIV_PAGE_SSIZE_MAX					\
+	(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+
+/** Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM	32
+
+/** This is the "mbmaxlen" for my_charset_filename (defined in
+strings/ctype-utf8.c), which is used to encode File and Database names. */
+#define FILENAME_CHARSET_MAXNAMLEN	5
+
+/** The maximum length of an encode table name in bytes.  The max
+table and database names are NAME_CHAR_LEN (64) characters. After the
+encoding, the max length would be NAME_CHAR_LEN (64) *
+FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a
+terminating '\0'. InnoDB can handle longer names internally */
+#define MAX_TABLE_NAME_LEN	320
+
+/** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
+the MySQL's NAME_LEN, see check_and_convert_db_name(). */
+#define MAX_DATABASE_NAME_LEN	MAX_TABLE_NAME_LEN
+
+/** MAX_FULL_NAME_LEN defines the full name path including the
+database name and table name. In addition, 14 bytes is added for:
+	2 for surrounding quotes around table name
+	1 for the separating dot (.)
+	9 for the #mysql50# prefix */
+#define MAX_FULL_NAME_LEN				\
+	(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
+
+/** The maximum length in bytes that a database name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_DB_UTF8_LEN		(NAME_LEN + 1)
+
+/** The maximum length in bytes that a table name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_TABLE_UTF8_LEN	(NAME_LEN + sizeof(srv_mysql50_table_name_prefix))
+
+/*
+			UNIVERSAL TYPE DEFINITIONS
+			==========================
+*/
+
+/* Note that inside MySQL 'byte' is defined as char on Linux! */
+#define byte			unsigned char
+
+/* Another basic type we use is unsigned long integer which should be equal to
+the word size of the machine, that is on a 32-bit platform 32 bits, and on a
+64-bit platform 64 bits. We also give the printf format for the type as a
+macro ULINTPF. */
+
+
+#ifdef __WIN__
+/* Use the integer types and formatting strings defined in Visual Studio. */
+# define UINT32PF	"%I32u"
+# define INT64PF	"%I64d"
+# define UINT64PF	"%I64u"
+# define UINT64PFx	"%016I64x"
+# define DBUG_LSN_PF    "%llu"
+typedef __int64 ib_int64_t;
+typedef unsigned __int64 ib_uint64_t;
+typedef unsigned __int32 ib_uint32_t;
+#else
+/* Use the integer types and formatting strings defined in the C99 standard. */
+# define UINT32PF	"%" PRIu32
+# define INT64PF	"%" PRId64
+# define UINT64PF	"%" PRIu64
+# define UINT64PFx	"%016" PRIx64
+# define DBUG_LSN_PF    UINT64PF
+typedef int64_t ib_int64_t;
+typedef uint64_t ib_uint64_t;
+typedef uint32_t ib_uint32_t;
+# endif /* __WIN__ */
+
+# define IB_ID_FMT	UINT64PF
+
+#ifdef _WIN64
+typedef unsigned __int64	ulint;
+typedef __int64			lint;
+# define ULINTPF		UINT64PF
+#else
+typedef unsigned long int	ulint;
+typedef long int		lint;
+# define ULINTPF		"%lu"
+#endif /* _WIN64 */
+
+#ifndef UNIV_HOTBACKUP
+typedef unsigned long long int	ullint;
+#endif /* UNIV_HOTBACKUP */
+
+#ifndef __WIN__
+#if SIZEOF_LONG != SIZEOF_VOIDP
+#error "Error: InnoDB's ulint must be of the same size as void*"
+#endif
+#endif
+
+/** The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED		((ulint)(-1))
+
+#define ULONG_UNDEFINED		((ulong)(-1))
+
+/** The 'undefined' value for a ib_uint64_t */
+#define UINT64_UNDEFINED	((ib_uint64_t)(-1))
+
+/** The bitmask of 32-bit unsigned integer */
+#define ULINT32_MASK		0xFFFFFFFF
+/** The undefined 32-bit unsigned integer */
+#define	ULINT32_UNDEFINED	ULINT32_MASK
+
+/** Maximum value for a ulint */
+#define ULINT_MAX		((ulint)(-2))
+
+/** Maximum value for ib_uint64_t */
+#define IB_UINT64_MAX		((ib_uint64_t) (~0ULL))
+
+/** The generic InnoDB system object identifier data type */
+typedef ib_uint64_t		ib_id_t;
+#define IB_ID_MAX		IB_UINT64_MAX
+
+/** The 'undefined' value for a ullint */
+#define ULLINT_UNDEFINED        ((ullint)(-1))
+
+/** This 'ibool' type is used within Innobase. Remember that different included
+headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
+#define ibool			ulint
+
+#ifndef TRUE
+
+#define TRUE    1
+#define FALSE   0
+
+#endif
+
+#define UNIV_NOTHROW
+
+/** The following number as the length of a logical field means that the field
+has the SQL NULL as its value. NOTE that because we assume that the length
+of a field is a 32-bit integer when we store it, for example, to an undo log
+on disk, we must have also this number fit in 32 bits, also in 64-bit
+computers! */
+
+#define UNIV_SQL_NULL ULINT32_UNDEFINED
+
+/** Lengths which are not UNIV_SQL_NULL, but bigger than the following
+number indicate that a field contains a reference to an externally
+stored part of the field in the tablespace. The length field then
+contains the sum of the following flag and the locally stored len. */
+
+#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_MAX)
+
+#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#define HAVE_GCC_GT_2
+/* Tell the compiler that variable/function is unused. */
+# define UNIV_UNUSED    __attribute__ ((unused))
+#else
+# define UNIV_UNUSED
+#endif /* CHECK FOR GCC VER_GT_2 */
+
+/* Some macros to improve branch prediction and reduce cache misses */
+#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2)
+/* Tell the compiler that 'expr' probably evaluates to 'constant'. */
+# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
+/* Tell the compiler that a pointer is likely to be NULL */
+# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0)
+/* Minimize cache-miss latency by moving data at addr into a cache before
+it is read. */
+# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3)
+/* Minimize cache-miss latency by moving data at addr into a cache before
+it is read or written. */
+# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
+
+/* Sun Studio includes sun_prefetch.h as of version 5.9 */
+#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
+       || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
+
+# include <sun_prefetch.h>
+
+#if __SUNPRO_C >= 0x550
+# undef UNIV_INTERN
+# define UNIV_INTERN __hidden
+#endif /* __SUNPRO_C >= 0x550 */
+
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+
+# if defined(INNODB_COMPILER_HINTS)
+//# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
+#  define UNIV_PREFETCH_R(addr) ((void) 0)
+#  define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
+# else
+#  define UNIV_PREFETCH_R(addr) ((void) 0)
+#  define UNIV_PREFETCH_RW(addr) ((void) 0)
+# endif /* INNODB_COMPILER_HINTS */
+
+#else
+/* Dummy versions of the macros */
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+# define UNIV_PREFETCH_R(addr) ((void) 0)
+# define UNIV_PREFETCH_RW(addr) ((void) 0)
+#endif
+
+/* Tell the compiler that cond is likely to hold */
+#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE)
+/* Tell the compiler that cond is unlikely to hold */
+#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE)
+
+/* Compile-time constant of the given array's size. */
+#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+
+/* The return type from a thread's start function differs between Unix and
+Windows, so define a typedef for it and a macro to use at the end of such
+functions. */
+
+#ifdef __WIN__
+typedef ulint os_thread_ret_t;
+#define OS_THREAD_DUMMY_RETURN return(0)
+#else
+typedef void* os_thread_ret_t;
+#define OS_THREAD_DUMMY_RETURN return(NULL)
+#endif
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+#ifdef UNIV_DEBUG_VALGRIND
+# include <valgrind/memcheck.h>
+# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
+# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
+# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
+# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
+# define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr)
+# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {		\
+	const void* _p = (const void*) (ulint)				\
+		VALGRIND_CHECK_MEM_IS_DEFINED(addr, size);		\
+	if (UNIV_LIKELY_NULL(_p)) {					\
+		fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n",	\
+			__FILE__, __LINE__,				\
+			(const void*) (addr), (unsigned) (size), (long)	\
+			(((const char*) _p) - ((const char*) (addr))));	\
+		if (should_abort) {					\
+			ut_error;					\
+		}							\
+	}								\
+} while (0)
+# define UNIV_MEM_ASSERT_RW(addr, size)					\
+	UNIV_MEM_ASSERT_RW_LOW(addr, size, false)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size)				\
+	UNIV_MEM_ASSERT_RW_LOW(addr, size, true)
+# define UNIV_MEM_ASSERT_W(addr, size) do {				\
+	const void* _p = (const void*) (ulint)				\
+		VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size);		\
+	if (UNIV_LIKELY_NULL(_p))					\
+		fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n",	\
+			__FILE__, __LINE__,				\
+			(const void*) (addr), (unsigned) (size), (long)	\
+			(((const char*) _p) - ((const char*) (addr))));	\
+	} while (0)
+# define UNIV_MEM_TRASH(addr, c, size) do {				\
+	ut_d(memset(addr, c, size));					\
+	UNIV_MEM_INVALID(addr, size);					\
+	} while (0)
+#else
+# define UNIV_MEM_VALID(addr, size) do {} while(0)
+# define UNIV_MEM_INVALID(addr, size) do {} while(0)
+# define UNIV_MEM_FREE(addr, size) do {} while(0)
+# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
+# define UNIV_MEM_DESC(addr, size) do {} while(0)
+# define UNIV_MEM_UNDESC(b) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {} while(0)
+# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0)
+# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
+# define UNIV_MEM_TRASH(addr, c, size) do {} while(0)
+#endif
+#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do {	\
+	UNIV_MEM_ASSERT_W(addr, size);			\
+	UNIV_MEM_FREE(addr, size);			\
+} while (0)
+#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do {	\
+	UNIV_MEM_ASSERT_W(addr, size);			\
+	UNIV_MEM_ALLOC(addr, size);			\
+} while (0)
+
+extern ulong	srv_page_size_shift;
+extern ulong	srv_page_size;
+
+#endif
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
new file mode 100644
index 00000000000..b5c80b97b43
--- /dev/null
+++ b/storage/innobase/include/usr0sess.h
@@ -0,0 +1,77 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0sess.h
+Sessions
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0sess_h
+#define usr0sess_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "trx0types.h"
+#include "srv0srv.h"
+#include "trx0types.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "data0data.h"
+#include "rem0rec.h"
+
+/*********************************************************************//**
+Opens a session.
+@return	own: session object */
+UNIV_INTERN
+sess_t*
+sess_open(void);
+/*============*/
+/*********************************************************************//**
+Closes a session, freeing the memory occupied by it. */
+UNIV_INTERN
+void
+sess_close(
+/*=======*/
+	sess_t*		sess);		/* in, own: session object */
+
+/* The session handle. This data structure is only used by purge and is
+not really necessary. We should get rid of it. */
+struct sess_t{
+	ulint		state;		/*!< state of the session */
+	trx_t*		trx;		/*!< transaction object permanently
+					assigned for the session: the
+					transaction instance designated by the
+					trx id changes, but the memory
+					structure is preserved */
+	UT_LIST_BASE_NODE_T(que_t)
+			graphs;		/*!< query graphs belonging to this
+					session */
+};
+
+/* Session states */
+#define SESS_ACTIVE		1
+#define SESS_ERROR		2	/* session contains an error message
+					which has not yet been communicated
+					to the client */
+#ifndef UNIV_NONINL
+#include "usr0sess.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic
new file mode 100644
index 00000000000..284e59537fe
--- /dev/null
+++ b/storage/innobase/include/usr0sess.ic
@@ -0,0 +1,24 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0sess.ic
+Sessions
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
new file mode 100644
index 00000000000..6ba937cacc8
--- /dev/null
+++ b/storage/innobase/include/usr0types.h
@@ -0,0 +1,31 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/usr0types.h
+Users and sessions global types
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0types_h
+#define usr0types_h
+
+struct sess_t;
+
+#endif
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
new file mode 100644
index 00000000000..1085736c7ab
--- /dev/null
+++ b/storage/innobase/include/ut0bh.h
@@ -0,0 +1,152 @@
+/***************************************************************************//**
+
+Copyright (c) 2011, 2013, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.h
+Binary min-heap interface.
+
+Created 2010-05-28 by Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_UT0BH_H
+#define INNOBASE_UT0BH_H
+
+#include "univ.i"
+
+/** Comparison function for objects in the binary heap. */
+typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
+
+struct ib_bh_t;
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+	const ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+	const ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+	const ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in: instance */
+	ulint		i);			/*!< in: index */
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	ulint		i,			/*!< in: index */
+	const void*	elem);			/*!< in: element to add */
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+	ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+	ib_bh_t*	ib_bh);			/*!< in/out: instance */
+
+/**********************************************************************//**
+Create a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+ib_bh_t*
+ib_bh_create(
+/*=========*/
+	ib_bh_cmp_t	compare,		/*!< in: comparator */
+	ulint		sizeof_elem,		/*!< in: size of one element */
+	ulint		max_elems);		/*!< in: max elements allowed */
+
+/**********************************************************************//**
+Free a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+void
+ib_bh_free(
+/*=======*/
+	ib_bh_t*	ib_bh);			/*!< in,own: instance */
+
+/**********************************************************************//**
+Add an element to the binary heap. Note: The element is copied.
+@return pointer to added element or NULL if full. */
+UNIV_INTERN
+void*
+ib_bh_push(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	const void*	elem);			/*!< in: element to add */
+
+/**********************************************************************//**
+Remove the first element from the binary heap. */
+UNIV_INTERN
+void
+ib_bh_pop(
+/*======*/
+	ib_bh_t*	ib_bh);			/*!< in/out: instance */
+
+/** Binary heap data structure */
+struct ib_bh_t {
+	ulint		max_elems;		/*!< max elements allowed */
+	ulint		n_elems;		/*!< current size */
+	ulint		sizeof_elem;		/*!< sizeof element */
+	ib_bh_cmp_t	compare;		/*!< comparator */
+};
+
+#ifndef UNIV_NONINL
+#include "ut0bh.ic"
+#endif
+
+#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic
new file mode 100644
index 00000000000..b11de5b8b3e
--- /dev/null
+++ b/storage/innobase/include/ut0bh.ic
@@ -0,0 +1,125 @@
+/***************************************************************************//**
+
+Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.ic
+Binary min-heap implementation.
+
+Created 2011-01-15 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+#include "ut0mem.h"	/* For ut_memcpy() */
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+	const ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh->n_elems);
+}
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+	const ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh_size(ib_bh) == 0);
+}
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+	const ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
+}
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in: instance */
+	ulint		i)			/*!< in: index */
+{
+	byte*		ptr = (byte*) (ib_bh + 1);
+
+	ut_a(i < ib_bh_size(ib_bh));
+
+	return(ptr + (ib_bh->sizeof_elem * i));
+}
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	ulint		i,			/*!< in: index */
+	const void*	elem)			/*!< in: element to add */
+{
+	void*		ptr = ib_bh_get(ib_bh, i);
+
+	ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
+
+	return(ptr);
+}
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+	ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
+}
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+	ib_bh_t*	ib_bh)			/*!< in/out: instance */
+{
+	return(ib_bh_is_empty(ib_bh)
+		? NULL
+		: ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
+}
+
diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h
new file mode 100644
index 00000000000..5bdd553ca80
--- /dev/null
+++ b/storage/innobase/include/ut0byte.h
@@ -0,0 +1,119 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0byte.h
+Utilities for byte operations
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0byte_h
+#define ut0byte_h
+
+
+
+#include "univ.i"
+
+/*******************************************************//**
+Creates a 64-bit integer out of two 32-bit integers.
+@return	created integer */
+UNIV_INLINE
+ib_uint64_t
+ut_ull_create(
+/*==========*/
+	ulint	high,	/*!< in: high-order 32 bits */
+	ulint	low)	/*!< in: low-order 32 bits */
+	__attribute__((const));
+
+/********************************************************//**
+Rounds a 64-bit integer downward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_down(
+/*=================*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no);	/*!< in: align by this number
+					which must be a power of 2 */
+/********************************************************//**
+Rounds ib_uint64_t upward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_up(
+/*===============*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no);	/*!< in: align by this number
+					which must be a power of 2 */
+/*********************************************************//**
+The following function rounds up a pointer to the nearest aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no);	/*!< in: align by this number */
+/*********************************************************//**
+The following function rounds down a pointer to the nearest
+aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+		__attribute__((const));
+/*********************************************************//**
+The following function computes the offset of a pointer from the nearest
+aligned address.
+@return	distance from aligned pointer */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*============*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+			__attribute__((const));
+/*****************************************************************//**
+Gets the nth bit of a ulint.
+@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n);	/*!< in: nth bit requested */
+/*****************************************************************//**
+Sets the nth bit of a ulint.
+@return	the ulint with the bit set as requested */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n,	/*!< in: nth bit requested */
+	ibool	val);	/*!< in: value for the bit to set */
+
+#ifndef UNIV_NONINL
+#include "ut0byte.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
new file mode 100644
index 00000000000..873d98c727e
--- /dev/null
+++ b/storage/innobase/include/ut0byte.ic
@@ -0,0 +1,173 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0byte.ic
+Utilities for byte operations
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/*******************************************************//**
+Creates a 64-bit integer out of two 32-bit integers.
+@return	created integer */
+UNIV_INLINE
+ib_uint64_t
+ut_ull_create(
+/*==========*/
+	ulint	high,	/*!< in: high-order 32 bits */
+	ulint	low)	/*!< in: low-order 32 bits */
+{
+	ut_ad(high <= ULINT32_MASK);
+	ut_ad(low <= ULINT32_MASK);
+	return(((ib_uint64_t) high) << 32 | low);
+}
+
+/********************************************************//**
+Rounds a 64-bit integer downward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_down(
+/*=================*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no)	/*!< in: align by this number
+					which must be a power of 2 */
+{
+	ut_ad(align_no > 0);
+	ut_ad(ut_is_2pow(align_no));
+
+	return(n & ~((ib_uint64_t) align_no - 1));
+}
+
+/********************************************************//**
+Rounds ib_uint64_t upward to a multiple of a power of 2.
+@return	rounded value */
+UNIV_INLINE
+ib_uint64_t
+ut_uint64_align_up(
+/*===============*/
+	ib_uint64_t	 n,		/*!< in: number to be rounded */
+	ulint		 align_no)	/*!< in: align by this number
+					which must be a power of 2 */
+{
+	ib_uint64_t	align_1 = (ib_uint64_t) align_no - 1;
+
+	ut_ad(align_no > 0);
+	ut_ad(ut_is_2pow(align_no));
+
+	return((n + align_1) & ~align_1);
+}
+
+/*********************************************************//**
+The following function rounds up a pointer to the nearest aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return((void*)((((ulint) ptr) + align_no - 1) & ~(align_no - 1)));
+}
+
+/*********************************************************//**
+The following function rounds down a pointer to the nearest
+aligned address.
+@return	aligned pointer */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return((void*)((((ulint) ptr)) & ~(align_no - 1)));
+}
+
+/*********************************************************//**
+The following function computes the offset of a pointer from the nearest
+aligned address.
+@return	distance from aligned pointer */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*============*/
+	const void*	ptr,		/*!< in: pointer */
+	ulint		align_no)	/*!< in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return(((ulint) ptr) & (align_no - 1));
+}
+
+/*****************************************************************//**
+Gets the nth bit of a ulint.
+@return	TRUE if nth bit is 1; 0th bit is defined to be the least significant */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n)	/*!< in: nth bit requested */
+{
+	ut_ad(n < 8 * sizeof(ulint));
+#if TRUE != 1
+# error "TRUE != 1"
+#endif
+	return(1 & (a >> n));
+}
+
+/*****************************************************************//**
+Sets the nth bit of a ulint.
+@return	the ulint with the bit set as requested */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	n,	/*!< in: nth bit requested */
+	ibool	val)	/*!< in: value for the bit to set */
+{
+	ut_ad(n < 8 * sizeof(ulint));
+#if TRUE != 1
+# error "TRUE != 1"
+#endif
+	if (val) {
+		return(((ulint) 1 << n) | a);
+	} else {
+		return(~((ulint) 1 << n) & a);
+	}
+}
diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h
new file mode 100644
index 00000000000..fe0f36dfff2
--- /dev/null
+++ b/storage/innobase/include/ut0counter.h
@@ -0,0 +1,203 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0counter.h
+
+Counter utility class
+
+Created 2012/04/12 by Sunny Bains
+*******************************************************/
+
+#ifndef UT0COUNTER_H
+#define UT0COUNTER_H
+
+#include "univ.i"
+#include <string.h>
+#include "os0thread.h"
+
+/** CPU cache line size */
+#define CACHE_LINE_SIZE		64
+
+/** Default number of slots to use in ib_counter_t */
+#define IB_N_SLOTS		64
+
+/** Get the offset into the counter array. */
+template <typename Type, int N>
+struct generic_indexer_t {
+	/** Default constructor/destructor should be OK. */
+
+        /** @return offset within m_counter */
+        size_t offset(size_t index) const UNIV_NOTHROW {
+                return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
+        }
+};
+
+#ifdef HAVE_SCHED_GETCPU
+#include <utmpx.h>
+/** Use the cpu id to index into the counter array. If it fails then
+use the thread id. */
+template <typename Type, int N>
+struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
+	/** Default constructor/destructor should be OK. */
+
+	/* @return result from sched_getcpu(), the thread id if it fails. */
+	size_t get_rnd_index() const UNIV_NOTHROW {
+
+		size_t	cpu = sched_getcpu();
+		if (cpu == -1) {
+			cpu = (lint) os_thread_get_curr_id();
+		}
+
+		return(cpu);
+	}
+};
+#endif /* HAVE_SCHED_GETCPU */
+
+/** Use the thread id to index into the counter array. */
+template <typename Type, int N>
+struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
+	/** Default constructor/destructor should are OK. */
+
+	/* @return a random number, currently we use the thread id. Where
+	thread id is represented as a pointer, it may not work as
+	effectively. */
+	size_t get_rnd_index() const UNIV_NOTHROW {
+		return((lint) os_thread_get_curr_id());
+	}
+};
+
+/** For counters wher N=1 */
+template <typename Type, int N=1>
+struct single_indexer_t {
+	/** Default constructor/destructor should are OK. */
+
+        /** @return offset within m_counter */
+        size_t offset(size_t index) const UNIV_NOTHROW {
+		ut_ad(N == 1);
+                return((CACHE_LINE_SIZE / sizeof(Type)));
+        }
+
+	/* @return 1 */
+	size_t get_rnd_index() const UNIV_NOTHROW {
+		ut_ad(N == 1);
+		return(1);
+	}
+};
+
+/** Class for using fuzzy counters. The counter is not protected by any
+mutex and the results are not guaranteed to be 100% accurate but close
+enough. Creates an array of counters and separates each element by the
+CACHE_LINE_SIZE bytes */
+template <
+	typename Type,
+	int N = IB_N_SLOTS,
+	template<typename, int> class Indexer = thread_id_indexer_t>
+class ib_counter_t {
+public:
+	ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
+
+	~ib_counter_t()
+	{
+		ut_ad(validate());
+	}
+
+	bool validate() UNIV_NOTHROW {
+#ifdef UNIV_DEBUG
+		size_t	n = (CACHE_LINE_SIZE / sizeof(Type));
+
+		/* Check that we aren't writing outside our defined bounds. */
+		for (size_t i = 0; i < UT_ARR_SIZE(m_counter); i += n) {
+			for (size_t j = 1; j < n - 1; ++j) {
+				ut_ad(m_counter[i + j] == 0);
+			}
+		}
+#endif /* UNIV_DEBUG */
+		return(true);
+	}
+
+	/** If you can't use a good index id. Increment by 1. */
+	void inc() UNIV_NOTHROW { add(1); }
+
+	/** If you can't use a good index id.
+	* @param n  - is the amount to increment */
+	void add(Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(m_policy.get_rnd_index());
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] += n;
+	}
+
+	/** Use this if you can use a unique indentifier, saves a
+	call to get_rnd_index().
+	@param i - index into a slot
+	@param n - amount to increment */
+	void add(size_t index, Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(index);
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] += n;
+	}
+
+	/** If you can't use a good index id. Decrement by 1. */
+	void dec() UNIV_NOTHROW { sub(1); }
+
+	/** If you can't use a good index id.
+	* @param - n is the amount to decrement */
+	void sub(Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(m_policy.get_rnd_index());
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] -= n;
+	}
+
+	/** Use this if you can use a unique indentifier, saves a
+	call to get_rnd_index().
+	@param i - index into a slot
+	@param n - amount to decrement */
+	void sub(size_t index, Type n) UNIV_NOTHROW {
+		size_t	i = m_policy.offset(index);
+
+		ut_ad(i < UT_ARR_SIZE(m_counter));
+
+		m_counter[i] -= n;
+	}
+
+	/* @return total value - not 100% accurate, since it is not atomic. */
+	operator Type() const UNIV_NOTHROW {
+		Type	total = 0;
+
+		for (size_t i = 0; i < N; ++i) {
+			total += m_counter[m_policy.offset(i)];
+		}
+
+		return(total);
+	}
+
+private:
+	/** Indexer into the array */
+	Indexer<Type, N>m_policy;
+
+        /** Slot 0 is unused. */
+	Type		m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
+};
+
+#endif /* UT0COUNTER_H */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
new file mode 100644
index 00000000000..86217692764
--- /dev/null
+++ b/storage/innobase/include/ut0crc32.h
@@ -0,0 +1,51 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0crc32.h
+CRC32 implementation
+
+Created Aug 10, 2011 Vasil Dimov
+*******************************************************/
+
+#ifndef ut0crc32_h
+#define ut0crc32_h
+
+#include "univ.i"
+
+/********************************************************************//**
+Initializes the data structures used by ut_crc32(). Does not do any
+allocations, would not hurt if called twice, but would be pointless. */
+UNIV_INTERN
+void
+ut_crc32_init();
+/*===========*/
+
+/********************************************************************//**
+Calculates CRC32.
+@param ptr	- data over which to calculate CRC32.
+@param len	- data length in bytes.
+@return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41,
+or 0x1EDC6F41 without the high-order bit) */
+typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
+
+extern ib_ut_crc32_t	ut_crc32;
+
+extern bool	ut_crc32_sse2_enabled;
+
+#endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
new file mode 100644
index 00000000000..6a4afe99597
--- /dev/null
+++ b/storage/innobase/include/ut0dbg.h
@@ -0,0 +1,132 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*****************************************************************//**
+@file include/ut0dbg.h
+Debug utilities for Innobase
+
+Created 1/30/1994 Heikki Tuuri
+**********************************************************************/
+
+#ifndef ut0dbg_h
+#define ut0dbg_h
+
+#ifdef UNIV_INNOCHECKSUM
+#define ut_a		assert
+#define ut_ad		assert
+#define ut_error	assert(0)
+#else /* !UNIV_INNOCHECKSUM */
+
+#include "univ.i"
+#include <stdlib.h>
+#include "os0thread.h"
+
+#if defined(__GNUC__) && (__GNUC__ > 2)
+/** Test if an assertion fails.
+@param EXPR	assertion expression
+@return		nonzero if EXPR holds, zero if not */
+# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
+#else
+/** This is used to eliminate compiler warnings */
+extern ulint	ut_dbg_zero;
+/** Test if an assertion fails.
+@param EXPR	assertion expression
+@return		nonzero if EXPR holds, zero if not */
+# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
+#endif
+
+/*************************************************************//**
+Report a failed assertion. */
+UNIV_INTERN
+void
+ut_dbg_assertion_failed(
+/*====================*/
+	const char*	expr,	/*!< in: the failed assertion */
+	const char*	file,	/*!< in: source file containing the assertion */
+	ulint		line)	/*!< in: line number of the assertion */
+	UNIV_COLD __attribute__((nonnull(2)));
+
+/** Abort the execution. */
+# define UT_DBG_PANIC abort()
+
+/** Abort execution if EXPR does not evaluate to nonzero.
+@param EXPR	assertion expression that should hold */
+#define ut_a(EXPR) do {						\
+	if (UT_DBG_FAIL(EXPR)) {				\
+		ut_dbg_assertion_failed(#EXPR,			\
+				__FILE__, (ulint) __LINE__);	\
+		UT_DBG_PANIC;					\
+	}							\
+} while (0)
+
+/** Abort execution. */
+#define ut_error do {						\
+	ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__);	\
+	UT_DBG_PANIC;						\
+} while (0)
+
+#ifdef UNIV_DEBUG
+/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_ad(EXPR)	ut_a(EXPR)
+/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_d(EXPR)	do {EXPR;} while (0)
+#else
+/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_ad(EXPR)
+/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
+#define ut_d(EXPR)
+#endif
+
+/** Silence warnings about an unused variable by doing a null assignment.
+@param A	the unused variable */
+#define UT_NOT_USED(A)	A = A
+
+#ifdef UNIV_COMPILE_TEST_FUNCS
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/** structure used for recording usage statistics */
+struct speedo_t {
+	struct rusage	ru;	/*!< getrusage() result */
+	struct timeval	tv;	/*!< gettimeofday() result */
+};
+
+/*******************************************************************//**
+Resets a speedo (records the current time in it). */
+UNIV_INTERN
+void
+speedo_reset(
+/*=========*/
+	speedo_t*	speedo);	/*!< out: speedo */
+
+/*******************************************************************//**
+Shows the time elapsed and usage statistics since the last reset of a
+speedo. */
+UNIV_INTERN
+void
+speedo_show(
+/*========*/
+	const speedo_t*	speedo);	/*!< in: speedo */
+
+#endif /* UNIV_COMPILE_TEST_FUNCS */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+#endif
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
new file mode 100644
index 00000000000..29fc8669ce4
--- /dev/null
+++ b/storage/innobase/include/ut0list.h
@@ -0,0 +1,180 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0list.h
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/*******************************************************************//**
+A double-linked list. This differs from the one in ut0lst.h in that in this
+one, each list node contains a pointer to the data, whereas the one in
+ut0lst.h uses a strategy where the list pointers are embedded in the data
+items themselves.
+
+Use this one when you need to store arbitrary data in the list where you
+can't embed the list pointers in the data, if a data item needs to be
+stored in multiple lists, etc.
+
+Note about the memory management: ib_list_t is a fixed-size struct whose
+allocation/deallocation is done through ib_list_create/ib_list_free, but the
+memory for the list nodes is allocated through a user-given memory heap,
+which can either be the same for all nodes or vary per node. Most users will
+probably want to create a memory heap to store the item-specific data, and
+pass in this same heap to the list node creation functions, thus
+automatically freeing the list node when the item's heap is freed.
+
+************************************************************************/
+
+#ifndef IB_LIST_H
+#define IB_LIST_H
+
+#include "mem0mem.h"
+
+struct ib_list_t;
+struct ib_list_node_t;
+
+/****************************************************************//**
+Create a new list using mem_alloc. Lists created with this function must be
+freed with ib_list_free.
+@return	list */
+UNIV_INTERN
+ib_list_t*
+ib_list_create(void);
+/*=================*/
+
+
+/****************************************************************//**
+Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
+lists created with this function.
+@return	list */
+UNIV_INTERN
+ib_list_t*
+ib_list_create_heap(
+/*================*/
+	mem_heap_t*	heap);	/*!< in: memory heap to use */
+
+/****************************************************************//**
+Free a list. */
+UNIV_INTERN
+void
+ib_list_free(
+/*=========*/
+	ib_list_t*	list);	/*!< in: list */
+
+/****************************************************************//**
+Add the data to the start of the list.
+@return	new list node */
+UNIV_INTERN
+ib_list_node_t*
+ib_list_add_first(
+/*==============*/
+	ib_list_t*	list,	/*!< in: list */
+	void*		data,	/*!< in: data */
+	mem_heap_t*	heap);	/*!< in: memory heap to use */
+
+/****************************************************************//**
+Add the data to the end of the list.
+@return	new list node */
+UNIV_INTERN
+ib_list_node_t*
+ib_list_add_last(
+/*=============*/
+	ib_list_t*	list,	/*!< in: list */
+	void*		data,	/*!< in: data */
+	mem_heap_t*	heap);	/*!< in: memory heap to use */
+
+/****************************************************************//**
+Add the data after the indicated node.
+@return	new list node */
+UNIV_INTERN
+ib_list_node_t*
+ib_list_add_after(
+/*==============*/
+	ib_list_t*	list,		/*!< in: list */
+	ib_list_node_t*	prev_node,	/*!< in: node preceding new node (can
+					be NULL) */
+	void*		data,		/*!< in: data */
+	mem_heap_t*	heap);		/*!< in: memory heap to use */
+
+/****************************************************************//**
+Remove the node from the list. */
+UNIV_INTERN
+void
+ib_list_remove(
+/*===========*/
+	ib_list_t*	list,	/*!< in: list */
+	ib_list_node_t*	node);	/*!< in: node to remove */
+
+/****************************************************************//**
+Get the first node in the list.
+@return	first node, or NULL */
+UNIV_INLINE
+ib_list_node_t*
+ib_list_get_first(
+/*==============*/
+	ib_list_t*	list);	/*!< in: list */
+
+/****************************************************************//**
+Get the last node in the list.
+@return	last node, or NULL */
+UNIV_INLINE
+ib_list_node_t*
+ib_list_get_last(
+/*=============*/
+	ib_list_t*	list);	/*!< in: list */
+
+/********************************************************************
+Check if list is empty. */
+UNIV_INLINE
+ibool
+ib_list_is_empty(
+/*=============*/
+					/* out: TRUE if empty else  */
+	const ib_list_t*	list);	/* in: list */
+
+/* List. */
+struct ib_list_t {
+	ib_list_node_t*		first;		/*!< first node */
+	ib_list_node_t*		last;		/*!< last node */
+	ibool			is_heap_list;	/*!< TRUE if this list was
+						allocated through a heap */
+};
+
+/* A list node. */
+struct ib_list_node_t {
+	ib_list_node_t*		prev;		/*!< previous node */
+	ib_list_node_t*		next;		/*!< next node */
+	void*			data;		/*!< user data */
+};
+
+/* Quite often, the only additional piece of data you need is the per-item
+memory heap, so we have this generic struct available to use in those
+cases. */
+struct ib_list_helper_t {
+	mem_heap_t*	heap;		/*!< memory heap */
+	void*		data;		/*!< user data */
+};
+
+#ifndef UNIV_NONINL
+#include "ut0list.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
new file mode 100644
index 00000000000..d9dcb2eac99
--- /dev/null
+++ b/storage/innobase/include/ut0list.ic
@@ -0,0 +1,60 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0list.ic
+A double-linked list
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/****************************************************************//**
+Get the first node in the list.
+@return	first node, or NULL */
+UNIV_INLINE
+ib_list_node_t*
+ib_list_get_first(
+/*==============*/
+	ib_list_t*	list)	/*!< in: list */
+{
+	return(list->first);
+}
+
+/****************************************************************//**
+Get the last node in the list.
+@return	last node, or NULL */
+UNIV_INLINE
+ib_list_node_t*
+ib_list_get_last(
+/*=============*/
+	ib_list_t*	list)	/*!< in: list */
+{
+	return(list->last);
+}
+
+/********************************************************************
+Check if list is empty. */
+UNIV_INLINE
+ibool
+ib_list_is_empty(
+/*=============*/
+					/* out: TRUE if empty else FALSE */
+	const ib_list_t*	list)	/* in: list */
+{
+	return(!(list->first || list->last));
+}
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
new file mode 100644
index 00000000000..b53e7ade4c1
--- /dev/null
+++ b/storage/innobase/include/ut0lst.h
@@ -0,0 +1,408 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0lst.h
+List utilities
+
+Created 9/10/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0lst_h
+#define ut0lst_h
+
+#include "univ.i"
+
+/*******************************************************************//**
+Return offset of F in POD T.
+@param T	- POD pointer
+@param F	- Field in T */
+#define IB_OFFSETOF(T, F)						\
+	(reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T))
+
+/* This module implements the two-way linear list which should be used
+if a list is used in the database. Note that a single struct may belong
+to two or more lists, provided that the list are given different names.
+An example of the usage of the lists can be found in fil0fil.cc. */
+
+/*******************************************************************//**
+This macro expands to the unnamed type definition of a struct which acts
+as the two-way list base node. The base node contains pointers
+to both ends of the list and a count of nodes in the list (excluding
+the base node from the count).
+@param TYPE	the name of the list node data type */
+template <typename TYPE>
+struct ut_list_base {
+	typedef TYPE elem_type;
+
+	ulint	count;	/*!< count of nodes in list */
+	TYPE*	start;	/*!< pointer to list start, NULL if empty */
+	TYPE*	end;	/*!< pointer to list end, NULL if empty */
+};
+
+#define UT_LIST_BASE_NODE_T(TYPE)	ut_list_base<TYPE>
+
+/*******************************************************************//**
+This macro expands to the unnamed type definition of a struct which
+should be embedded in the nodes of the list, the node type must be a struct.
+This struct contains the pointers to next and previous nodes in the list.
+The name of the field in the node struct should be the name given
+to the list.
+@param TYPE	the list node type name */
+/* Example:
+struct LRU_node_t {
+	UT_LIST_NODE_T(LRU_node_t)	LRU_list;
+	...
+}
+The example implements an LRU list of name LRU_list. Its nodes are of type
+LRU_node_t. */
+
+template <typename TYPE>
+struct ut_list_node {
+	TYPE* 	prev;	/*!< pointer to the previous node,
+			NULL if start of list */
+	TYPE* 	next;	/*!< pointer to next node, NULL if end of list */
+};
+
+#define UT_LIST_NODE_T(TYPE)	ut_list_node<TYPE>
+
+/*******************************************************************//**
+Get the list node at offset.
+@param elem	- list element
+@param offset	- offset within element.
+@return reference to list node. */
+template <typename Type>
+ut_list_node<Type>&
+ut_elem_get_node(Type&	elem, size_t offset)
+{
+	ut_a(offset < sizeof(elem));
+
+	return(*reinterpret_cast<ut_list_node<Type>*>(
+		reinterpret_cast<byte*>(&elem) + offset));
+}
+
+/*******************************************************************//**
+Initializes the base node of a two-way list.
+@param BASE	the list base node
+*/
+#define UT_LIST_INIT(BASE)\
+{\
+	(BASE).count = 0;\
+	(BASE).start = NULL;\
+	(BASE).end   = NULL;\
+}\
+
+/*******************************************************************//**
+Adds the node as the first element in a two-way linked list.
+@param list	the base node (not a pointer to it)
+@param elem	the element to add
+@param offset	offset of list node in elem. */
+template <typename List, typename Type>
+void
+ut_list_prepend(
+	List&		list,
+	Type&		elem,
+	size_t		offset)
+{
+	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+
+ 	elem_node.prev = 0;
+ 	elem_node.next = list.start;
+
+	if (list.start != 0) {
+		ut_list_node<Type>&	base_node =
+			ut_elem_get_node(*list.start, offset);
+
+		ut_ad(list.start != &elem);
+
+		base_node.prev = &elem;
+	}
+
+	list.start = &elem;
+
+	if (list.end == 0) {
+		list.end = &elem;
+	}
+
+	++list.count;
+}
+
+/*******************************************************************//**
+Adds the node as the first element in a two-way linked list.
+@param NAME	list name
+@param LIST	the base node (not a pointer to it)
+@param ELEM	the element to add */
+#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM)	\
+	ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+
+/*******************************************************************//**
+Adds the node as the last element in a two-way linked list.
+@param list	list
+@param elem	the element to add
+@param offset	offset of list node in elem */
+template <typename List, typename Type>
+void
+ut_list_append(
+	List&		list,
+	Type&		elem,
+	size_t		offset)
+{
+	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+
+	elem_node.next = 0;
+	elem_node.prev = list.end;
+
+	if (list.end != 0) {
+		ut_list_node<Type>&	base_node =
+			ut_elem_get_node(*list.end, offset);
+
+		ut_ad(list.end != &elem);
+
+		base_node.next = &elem;
+	}
+
+	list.end = &elem;
+
+	if (list.start == 0) {
+		list.start = &elem;
+	}
+
+	++list.count;
+}
+
+/*******************************************************************//**
+Adds the node as the last element in a two-way linked list.
+@param NAME	list name
+@param LIST	list
+@param ELEM	the element to add */
+#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\
+	ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+
+/*******************************************************************//**
+Inserts a ELEM2 after ELEM1 in a list.
+@param list	the base node
+@param elem1	node after which ELEM2 is inserted
+@param elem2	node being inserted after NODE1
+@param offset	offset of list node in elem1 and elem2 */
+template <typename List, typename Type>
+void
+ut_list_insert(
+	List&		list,
+	Type&		elem1,
+	Type&		elem2,
+	size_t		offset)
+{
+	ut_ad(&elem1 != &elem2);
+
+	ut_list_node<Type>&	elem1_node = ut_elem_get_node(elem1, offset);
+	ut_list_node<Type>&	elem2_node = ut_elem_get_node(elem2, offset);
+
+	elem2_node.prev = &elem1;
+	elem2_node.next = elem1_node.next;
+
+	if (elem1_node.next != NULL) {
+		ut_list_node<Type>&	next_node =
+			ut_elem_get_node(*elem1_node.next, offset);
+
+		next_node.prev = &elem2;
+	}
+
+	elem1_node.next = &elem2;
+
+	if (list.end == &elem1) {
+		list.end = &elem2;
+	}
+
+	++list.count;
+}
+
+/*******************************************************************//**
+Inserts a ELEM2 after ELEM1 in a list.
+@param NAME	list name
+@param LIST	the base node
+@param ELEM1	node after which ELEM2 is inserted
+@param ELEM2	node being inserted after ELEM1 */
+#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\
+	ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME))
+
+#ifdef UNIV_LIST_DEBUG
+/** Invalidate the pointers in a list node.
+@param NAME	list name
+@param N	pointer to the node that was removed */
+# define UT_LIST_REMOVE_CLEAR(N)					\
+	(N).next = (Type*) -1;						\
+	(N).prev = (N).next
+#else
+/** Invalidate the pointers in a list node.
+@param NAME	list name
+@param N	pointer to the node that was removed */
+# define UT_LIST_REMOVE_CLEAR(N)
+#endif /* UNIV_LIST_DEBUG */
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+@param list	the base node (not a pointer to it)
+@param elem	node to be removed from the list
+@param offset	offset of list node within elem */
+template <typename List, typename Type>
+void
+ut_list_remove(
+	List&		list,
+ 	Type&		elem,
+	size_t		offset)
+{
+	ut_list_node<Type>&	elem_node = ut_elem_get_node(elem, offset);
+
+	ut_a(list.count > 0);
+
+	if (elem_node.next != NULL) {
+		ut_list_node<Type>&	next_node =
+			ut_elem_get_node(*elem_node.next, offset);
+
+		next_node.prev = elem_node.prev;
+	} else {
+		list.end = elem_node.prev;
+	}
+
+	if (elem_node.prev != NULL) {
+		ut_list_node<Type>&	prev_node =
+			ut_elem_get_node(*elem_node.prev, offset);
+
+		prev_node.next = elem_node.next;
+	} else {
+		list.start = elem_node.next;
+	}
+
+	UT_LIST_REMOVE_CLEAR(elem_node);
+
+	--list.count;
+}
+
+/*******************************************************************//**
+Removes a node from a two-way linked list.
+  aram NAME	list name
+@param LIST	the base node (not a pointer to it)
+@param ELEM	node to be removed from the list */
+#define UT_LIST_REMOVE(NAME, LIST, ELEM)				\
+	ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
+
+/********************************************************************//**
+Gets the next node in a two-way list.
+@param NAME	list name
+@param N	pointer to a node
+@return		the successor of N in NAME, or NULL */
+#define UT_LIST_GET_NEXT(NAME, N)\
+	(((N)->NAME).next)
+
+/********************************************************************//**
+Gets the previous node in a two-way list.
+@param NAME	list name
+@param N	pointer to a node
+@return		the predecessor of N in NAME, or NULL */
+#define UT_LIST_GET_PREV(NAME, N)\
+	(((N)->NAME).prev)
+
+/********************************************************************//**
+Alternative macro to get the number of nodes in a two-way list, i.e.,
+its length.
+@param BASE	the base node (not a pointer to it).
+@return		the number of nodes in the list */
+#define UT_LIST_GET_LEN(BASE)\
+	(BASE).count
+
+/********************************************************************//**
+Gets the first node in a two-way list.
+@param BASE	the base node (not a pointer to it)
+@return		first node, or NULL if the list is empty */
+#define UT_LIST_GET_FIRST(BASE)\
+	(BASE).start
+
+/********************************************************************//**
+Gets the last node in a two-way list.
+@param BASE	the base node (not a pointer to it)
+@return		last node, or NULL if the list is empty */
+#define UT_LIST_GET_LAST(BASE)\
+	(BASE).end
+
+struct	NullValidate { void operator()(const void* elem) { } };
+
+/********************************************************************//**
+Iterate over all the elements and call the functor for each element.
+@param list	base node (not a pointer to it)
+@param functor	Functor that is called for each element in the list
+@parm  node	pointer to member node within list element */
+template <typename List, class Functor>
+void
+ut_list_map(
+	List&		list,
+	ut_list_node<typename List::elem_type>
+			List::elem_type::*node,
+	Functor		functor)
+{
+	ulint		count = 0;
+
+	for (typename List::elem_type* elem = list.start;
+	     elem != 0;
+	     elem = (elem->*node).next, ++count) {
+
+		functor(elem);
+	}
+
+	ut_a(count == list.count);
+}
+
+/********************************************************************//**
+Checks the consistency of a two-way list.
+@param list	base node (not a pointer to it)
+@param functor	Functor that is called for each element in the list
+@parm  node	pointer to member node within list element */
+template <typename List, class Functor>
+void
+ut_list_validate(
+	List&		list,
+	ut_list_node<typename List::elem_type>
+			List::elem_type::*node,
+	Functor		functor = NullValidate())
+{
+	ut_list_map(list, node, functor);
+
+	ulint		count = 0;
+
+	for (typename List::elem_type* elem = list.end;
+	     elem != 0;
+	     elem = (elem->*node).prev, ++count) {
+
+		functor(elem);
+	}
+
+	ut_a(count == list.count);
+}
+
+/********************************************************************//**
+Checks the consistency of a two-way list.
+@param NAME		the name of the list
+@param TYPE		node type
+@param LIST		base node (not a pointer to it)
+@param FUNCTOR		called for each list element */
+#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR)			\
+	ut_list_validate(LIST, &TYPE::NAME, FUNCTOR)
+
+#define UT_LIST_CHECK(NAME, TYPE, LIST)					\
+	ut_list_validate(LIST, &TYPE::NAME, NullValidate())
+
+#endif /* ut0lst.h */
diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h
new file mode 100644
index 00000000000..af7eb4e9b1d
--- /dev/null
+++ b/storage/innobase/include/ut0mem.h
@@ -0,0 +1,261 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0mem.h
+Memory primitives
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef ut0mem_h
+#define ut0mem_h
+
+#include "univ.i"
+#include <string.h>
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h"
+
+/** The total amount of memory currently allocated from the operating
+system with os_mem_alloc_large() or malloc().  Does not count malloc()
+if srv_use_sys_malloc is set.  Protected by ut_list_mutex. */
+extern ulint		ut_total_allocated_memory;
+
+/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
+extern os_fast_mutex_t	ut_list_mutex;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Wrapper for memcpy(3).  Copy memory area when the source and
+target are not overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, const void* sour, ulint n);
+
+/** Wrapper for memmove(3).  Copy memory area when the source and
+target are overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memmove(void* dest, const void* sour, ulint n);
+
+/** Wrapper for memcmp(3).  Compare memory areas.
+* @param str1	in: first memory block to compare
+* @param str2	in: second memory block to compare
+* @param n	in: number of bytes to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_memcmp(const void* str1, const void* str2, ulint n);
+
+/**********************************************************************//**
+Initializes the mem block list at database startup. */
+UNIV_INTERN
+void
+ut_mem_init(void);
+/*=============*/
+
+/**********************************************************************//**
+Allocates memory.
+@return	own: allocated memory */
+UNIV_INTERN
+void*
+ut_malloc_low(
+/*==========*/
+	ulint	n,			/*!< in: number of bytes to allocate */
+	ibool	assert_on_error)	/*!< in: if TRUE, we crash mysqld if
+					the memory cannot be allocated */
+	__attribute__((malloc));
+/**********************************************************************//**
+Allocates memory. */
+#define ut_malloc(n) ut_malloc_low(n, TRUE)
+/**********************************************************************//**
+Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
+a nop. */
+UNIV_INTERN
+void
+ut_free(
+/*====*/
+	void* ptr);  /*!< in, own: memory block, can be NULL */
+#ifndef UNIV_HOTBACKUP
+/**********************************************************************//**
+Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
+use this function because the allocation functions in mem0mem.h are the
+recommended ones in InnoDB.
+
+man realloc in Linux, 2004:
+
+       realloc()  changes the size of the memory block pointed to
+       by ptr to size bytes.  The contents will be  unchanged  to
+       the minimum of the old and new sizes; newly allocated mem�
+       ory will be uninitialized.  If ptr is NULL,  the	 call  is
+       equivalent  to malloc(size); if size is equal to zero, the
+       call is equivalent to free(ptr).	 Unless ptr is	NULL,  it
+       must  have  been	 returned by an earlier call to malloc(),
+       calloc() or realloc().
+
+RETURN VALUE
+       realloc() returns a pointer to the newly allocated memory,
+       which is suitably aligned for any kind of variable and may
+       be different from ptr, or NULL if the  request  fails.  If
+       size  was equal to 0, either NULL or a pointer suitable to
+       be passed to free() is returned.	 If realloc()  fails  the
+       original	 block	is  left  untouched  - it is not freed or
+       moved.
+@return	own: pointer to new mem block or NULL */
+UNIV_INTERN
+void*
+ut_realloc(
+/*=======*/
+	void*	ptr,	/*!< in: pointer to old block or NULL */
+	ulint	size);	/*!< in: desired size */
+/**********************************************************************//**
+Frees in shutdown all allocated memory not freed yet. */
+UNIV_INTERN
+void
+ut_free_all_mem(void);
+/*=================*/
+#endif /* !UNIV_HOTBACKUP */
+
+/** Wrapper for strcpy(3).  Copy a NUL-terminated string.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @return	dest */
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, const char* sour);
+
+/** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
+* @param str	in: string
+* @return	length of the string in bytes, excluding the terminating NUL */
+UNIV_INLINE
+ulint
+ut_strlen(const char* str);
+
+/** Wrapper for strcmp(3).  Compare NUL-terminated strings.
+* @param str1	in: first string to compare
+* @param str2	in: second string to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_strcmp(const char* str1, const char* str2);
+
+/**********************************************************************//**
+Copies up to size - 1 characters from the NUL-terminated string src to
+dst, NUL-terminating the result. Returns strlen(src), so truncation
+occurred if the return value >= size.
+@return	strlen(src) */
+UNIV_INTERN
+ulint
+ut_strlcpy(
+/*=======*/
+	char*		dst,	/*!< in: destination buffer */
+	const char*	src,	/*!< in: source buffer */
+	ulint		size);	/*!< in: size of destination buffer */
+
+/**********************************************************************//**
+Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
+(size - 1) bytes of src, not the first.
+@return	strlen(src) */
+UNIV_INTERN
+ulint
+ut_strlcpy_rev(
+/*===========*/
+	char*		dst,	/*!< in: destination buffer */
+	const char*	src,	/*!< in: source buffer */
+	ulint		size);	/*!< in: size of destination buffer */
+
+/**********************************************************************//**
+Return the number of times s2 occurs in s1. Overlapping instances of s2
+are only counted once.
+@return	the number of times s2 occurs in s1 */
+UNIV_INTERN
+ulint
+ut_strcount(
+/*========*/
+	const char*	s1,	/*!< in: string to search in */
+	const char*	s2);	/*!< in: string to search for */
+
+/**********************************************************************//**
+Replace every occurrence of s1 in str with s2. Overlapping instances of s1
+are only replaced once.
+@return	own: modified string, must be freed with mem_free() */
+UNIV_INTERN
+char*
+ut_strreplace(
+/*==========*/
+	const char*	str,	/*!< in: string to operate on */
+	const char*	s1,	/*!< in: string to replace */
+	const char*	s2);	/*!< in: string to replace s1 with */
+
+/********************************************************************
+Concatenate 3 strings.*/
+
+char*
+ut_str3cat(
+/*=======*/
+				/* out, own: concatenated string, must be
+				freed with mem_free() */
+	const char*	s1,	/* in: string 1 */
+	const char*	s2,	/* in: string 2 */
+	const char*	s3);	/* in: string 3 */
+
+/**********************************************************************//**
+Converts a raw binary data to a NUL-terminated hex string. The output is
+truncated if there is not enough space in "hex", make sure "hex_size" is at
+least (2 * raw_size + 1) if you do not want this to happen. Returns the
+actual number of characters written to "hex" (including the NUL).
+@return	number of chars written */
+UNIV_INLINE
+ulint
+ut_raw_to_hex(
+/*==========*/
+	const void*	raw,		/*!< in: raw data */
+	ulint		raw_size,	/*!< in: "raw" length in bytes */
+	char*		hex,		/*!< out: hex string */
+	ulint		hex_size);	/*!< in: "hex" size in bytes */
+
+/*******************************************************************//**
+Adds single quotes to the start and end of string and escapes any quotes
+by doubling them. Returns the number of bytes that were written to "buf"
+(including the terminating NUL). If buf_size is too small then the
+trailing bytes from "str" are discarded.
+@return	number of bytes that were written */
+UNIV_INLINE
+ulint
+ut_str_sql_format(
+/*==============*/
+	const char*	str,		/*!< in: string */
+	ulint		str_len,	/*!< in: string length in bytes */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size);	/*!< in: output buffer size
+					in bytes */
+
+#ifndef UNIV_NONINL
+#include "ut0mem.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic
new file mode 100644
index 00000000000..5c9071d52cc
--- /dev/null
+++ b/storage/innobase/include/ut0mem.ic
@@ -0,0 +1,317 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0mem.ic
+Memory primitives
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+#include "ut0byte.h"
+#include "mach0data.h"
+
+/** Wrapper for memcpy(3).  Copy memory area when the source and
+target are not overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, const void* sour, ulint n)
+{
+	return(memcpy(dest, sour, n));
+}
+
+/** Wrapper for memmove(3).  Copy memory area when the source and
+target are overlapping.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @param n	in: number of bytes to copy
+* @return	dest */
+UNIV_INLINE
+void*
+ut_memmove(void* dest, const void* sour, ulint n)
+{
+	return(memmove(dest, sour, n));
+}
+
+/** Wrapper for memcmp(3).  Compare memory areas.
+* @param str1	in: first memory block to compare
+* @param str2	in: second memory block to compare
+* @param n	in: number of bytes to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_memcmp(const void* str1, const void* str2, ulint n)
+{
+	return(memcmp(str1, str2, n));
+}
+
+/** Wrapper for strcpy(3).  Copy a NUL-terminated string.
+* @param dest	in: copy to
+* @param sour	in: copy from
+* @return	dest */
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, const char* sour)
+{
+	return(strcpy(dest, sour));
+}
+
+/** Wrapper for strlen(3).  Determine the length of a NUL-terminated string.
+* @param str	in: string
+* @return	length of the string in bytes, excluding the terminating NUL */
+UNIV_INLINE
+ulint
+ut_strlen(const char* str)
+{
+	return(strlen(str));
+}
+
+/** Wrapper for strcmp(3).  Compare NUL-terminated strings.
+* @param str1	in: first string to compare
+* @param str2	in: second string to compare
+* @return	negative, 0, or positive if str1 is smaller, equal,
+		or greater than str2, respectively. */
+UNIV_INLINE
+int
+ut_strcmp(const char* str1, const char* str2)
+{
+	return(strcmp(str1, str2));
+}
+
+/**********************************************************************//**
+Converts a raw binary data to a NUL-terminated hex string. The output is
+truncated if there is not enough space in "hex", make sure "hex_size" is at
+least (2 * raw_size + 1) if you do not want this to happen. Returns the
+actual number of characters written to "hex" (including the NUL).
+@return	number of chars written */
+UNIV_INLINE
+ulint
+ut_raw_to_hex(
+/*==========*/
+	const void*	raw,		/*!< in: raw data */
+	ulint		raw_size,	/*!< in: "raw" length in bytes */
+	char*		hex,		/*!< out: hex string */
+	ulint		hex_size)	/*!< in: "hex" size in bytes */
+{
+
+#ifdef WORDS_BIGENDIAN
+
+#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b))
+
+#define UINT16_GET_A(u)	((unsigned char) ((u) >> 8))
+#define UINT16_GET_B(u)	((unsigned char) ((u) & 0xFF))
+
+#else /* WORDS_BIGENDIAN */
+
+#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a))
+
+#define UINT16_GET_A(u)	((unsigned char) ((u) & 0xFF))
+#define UINT16_GET_B(u)	((unsigned char) ((u) >> 8))
+
+#endif /* WORDS_BIGENDIAN */
+
+#define MK_ALL_UINT16_WITH_A(a)	\
+	MK_UINT16(a, '0'),	\
+	MK_UINT16(a, '1'),	\
+	MK_UINT16(a, '2'),	\
+	MK_UINT16(a, '3'),	\
+	MK_UINT16(a, '4'),	\
+	MK_UINT16(a, '5'),	\
+	MK_UINT16(a, '6'),	\
+	MK_UINT16(a, '7'),	\
+	MK_UINT16(a, '8'),	\
+	MK_UINT16(a, '9'),	\
+	MK_UINT16(a, 'A'),	\
+	MK_UINT16(a, 'B'),	\
+	MK_UINT16(a, 'C'),	\
+	MK_UINT16(a, 'D'),	\
+	MK_UINT16(a, 'E'),	\
+	MK_UINT16(a, 'F')
+
+	static const uint16	hex_map[256] = {
+		MK_ALL_UINT16_WITH_A('0'),
+		MK_ALL_UINT16_WITH_A('1'),
+		MK_ALL_UINT16_WITH_A('2'),
+		MK_ALL_UINT16_WITH_A('3'),
+		MK_ALL_UINT16_WITH_A('4'),
+		MK_ALL_UINT16_WITH_A('5'),
+		MK_ALL_UINT16_WITH_A('6'),
+		MK_ALL_UINT16_WITH_A('7'),
+		MK_ALL_UINT16_WITH_A('8'),
+		MK_ALL_UINT16_WITH_A('9'),
+		MK_ALL_UINT16_WITH_A('A'),
+		MK_ALL_UINT16_WITH_A('B'),
+		MK_ALL_UINT16_WITH_A('C'),
+		MK_ALL_UINT16_WITH_A('D'),
+		MK_ALL_UINT16_WITH_A('E'),
+		MK_ALL_UINT16_WITH_A('F')
+	};
+	const unsigned char*	rawc;
+	ulint			read_bytes;
+	ulint			write_bytes;
+	ulint			i;
+
+	rawc = (const unsigned char*) raw;
+
+	if (hex_size == 0) {
+
+		return(0);
+	}
+
+	if (hex_size <= 2 * raw_size) {
+
+		read_bytes = hex_size / 2;
+		write_bytes = hex_size;
+	} else {
+
+		read_bytes = raw_size;
+		write_bytes = 2 * raw_size + 1;
+	}
+
+#define LOOP_READ_BYTES(ASSIGN)			\
+	for (i = 0; i < read_bytes; i++) {	\
+		ASSIGN;				\
+		hex += 2;			\
+		rawc++;				\
+	}
+
+	if (ut_align_offset(hex, 2) == 0) {
+
+		LOOP_READ_BYTES(
+			*(uint16*) hex = hex_map[*rawc]
+		);
+	} else {
+
+		LOOP_READ_BYTES(
+			*hex       = UINT16_GET_A(hex_map[*rawc]);
+			*(hex + 1) = UINT16_GET_B(hex_map[*rawc])
+		);
+	}
+
+	if (hex_size <= 2 * raw_size && hex_size % 2 == 0) {
+
+		hex--;
+	}
+
+	*hex = '\0';
+
+	return(write_bytes);
+}
+
+/*******************************************************************//**
+Adds single quotes to the start and end of string and escapes any quotes
+by doubling them. Returns the number of bytes that were written to "buf"
+(including the terminating NUL). If buf_size is too small then the
+trailing bytes from "str" are discarded.
+@return	number of bytes that were written */
+UNIV_INLINE
+ulint
+ut_str_sql_format(
+/*==============*/
+	const char*	str,		/*!< in: string */
+	ulint		str_len,	/*!< in: string length in bytes */
+	char*		buf,		/*!< out: output buffer */
+	ulint		buf_size)	/*!< in: output buffer size
+					in bytes */
+{
+	ulint	str_i;
+	ulint	buf_i;
+
+	buf_i = 0;
+
+	switch (buf_size) {
+	case 3:
+
+		if (str_len == 0) {
+
+			buf[buf_i] = '\'';
+			buf_i++;
+			buf[buf_i] = '\'';
+			buf_i++;
+		}
+		/* FALLTHROUGH */
+	case 2:
+	case 1:
+
+		buf[buf_i] = '\0';
+		buf_i++;
+		/* FALLTHROUGH */
+	case 0:
+
+		return(buf_i);
+	}
+
+	/* buf_size >= 4 */
+
+	buf[0] = '\'';
+	buf_i = 1;
+
+	for (str_i = 0; str_i < str_len; str_i++) {
+
+		char	ch;
+
+		if (buf_size - buf_i == 2) {
+
+			break;
+		}
+
+		ch = str[str_i];
+
+		switch (ch) {
+		case '\0':
+
+			if (buf_size - buf_i < 4) {
+
+				goto func_exit;
+			}
+			buf[buf_i] = '\\';
+			buf_i++;
+			buf[buf_i] = '0';
+			buf_i++;
+			break;
+		case '\'':
+		case '\\':
+
+			if (buf_size - buf_i < 4) {
+
+				goto func_exit;
+			}
+			buf[buf_i] = ch;
+			buf_i++;
+			/* FALLTHROUGH */
+		default:
+
+			buf[buf_i] = ch;
+			buf_i++;
+		}
+	}
+
+func_exit:
+
+	buf[buf_i] = '\'';
+	buf_i++;
+	buf[buf_i] = '\0';
+	buf_i++;
+
+	return(buf_i);
+}
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
new file mode 100644
index 00000000000..e0593e99bde
--- /dev/null
+++ b/storage/innobase/include/ut0rbt.h
@@ -0,0 +1,324 @@
+/***************************************************************************//**
+
+Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+/******************************************************************//**
+@file include/ut0rbt.h
+Various utilities
+
+Created 2007-03-20 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_UT0RBT_H
+#define INNOBASE_UT0RBT_H
+
+#if !defined(IB_RBT_TESTING)
+#include "univ.i"
+#include "ut0mem.h"
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define	ut_malloc	malloc
+#define	ut_free		free
+#define	ulint		unsigned long
+#define	ut_a(c)		assert(c)
+#define ut_error	assert(0)
+#define	ibool		unsigned int
+#define	TRUE		1
+#define	FALSE		0
+#endif
+
+struct ib_rbt_node_t;
+typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
+typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
+typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2);
+
+/** Red black tree color types */
+enum ib_rbt_color_t {
+	IB_RBT_RED,
+	IB_RBT_BLACK
+};
+
+/** Red black tree node */
+struct ib_rbt_node_t {
+	ib_rbt_color_t	color;			/* color of this node */
+
+	ib_rbt_node_t*	left;			/* points left child */
+	ib_rbt_node_t*	right;			/* points right child */
+	ib_rbt_node_t*	parent;			/* points parent node */
+
+	char		value[1];		/* Data value */
+};
+
+/** Red black tree instance.*/
+struct	ib_rbt_t {
+	ib_rbt_node_t*	nil;			/* Black colored node that is
+						used as a sentinel. This is
+						pre-allocated too.*/
+
+	ib_rbt_node_t*	root;			/* Root of the tree, this is
+						pre-allocated and the first
+						data node is the left child.*/
+
+	ulint		n_nodes;		/* Total number of data nodes */
+
+	ib_rbt_compare	compare;		/* Fn. to use for comparison */
+	ib_rbt_arg_compare
+			compare_with_arg;	/* Fn. to use for comparison
+						with argument */
+	ulint		sizeof_value;		/* Sizeof the item in bytes */
+	void*		cmp_arg;		/* Compare func argument */
+};
+
+/** The result of searching for a key in the tree, this is useful for
+a speedy lookup and insert if key doesn't exist.*/
+struct ib_rbt_bound_t {
+	const ib_rbt_node_t*
+			last;			/* Last node visited */
+
+	int		result;			/* Result of comparing with
+						the last non-nil node that
+						was visited */
+};
+
+/* Size in elements (t is an rb tree instance) */
+#define rbt_size(t)	(t->n_nodes)
+
+/* Check whether the rb tree is empty (t is an rb tree instance) */
+#define rbt_empty(t)	(rbt_size(t) == 0)
+
+/* Get data value (t is the data type, n is an rb tree node instance) */
+#define rbt_value(t, n) ((t*) &n->value[0])
+
+/* Compare a key with the node value (t is tree, k is key, n is node)*/
+#define rbt_compare(t, k, n) (t->compare(k, n->value))
+
+/**********************************************************************//**
+Free an instance of  a red black tree */
+UNIV_INTERN
+void
+rbt_free(
+/*=====*/
+	ib_rbt_t*	tree);			/*!< in: rb tree to free */
+/**********************************************************************//**
+Create an instance of a red black tree
+@return	rb tree instance */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create(
+/*=======*/
+	size_t		sizeof_value,		/*!< in: size in bytes */
+	ib_rbt_compare	compare);		/*!< in: comparator */
+/**********************************************************************//**
+Create an instance of a red black tree, whose comparison function takes
+an argument
+@return	rb tree instance */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create_arg_cmp(
+/*===============*/
+	size_t		sizeof_value,		/*!< in: size in bytes */
+	ib_rbt_arg_compare
+			compare,		/*!< in: comparator */
+	void*		cmp_arg);		/*!< in: compare fn arg */
+/**********************************************************************//**
+Delete a node from the red black tree, identified by key */
+UNIV_INTERN
+ibool
+rbt_delete(
+/*=======*/
+						/* in: TRUE on success */
+	ib_rbt_t*	tree,			/* in: rb tree */
+	const void*	key);			/* in: key to delete */
+/**********************************************************************//**
+Remove a node from the red black tree, NOTE: This function will not delete
+the node instance, THAT IS THE CALLERS RESPONSIBILITY.
+@return	the deleted node with the const. */
+UNIV_INTERN
+ib_rbt_node_t*
+rbt_remove_node(
+/*============*/
+	ib_rbt_t*	tree,			/*!< in: rb tree */
+	const ib_rbt_node_t*
+			node);			/*!< in: node to delete, this
+						is a fudge and declared const
+						because the caller has access
+						only to const nodes.*/
+/**********************************************************************//**
+Return a node from the red black tree, identified by
+key, NULL if not found
+@return	node if found else return NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lookup(
+/*=======*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree to search */
+	const void*	key);			/*!< in: key to lookup */
+/**********************************************************************//**
+Add data to the red black tree, identified by key (no dups yet!)
+@return	inserted node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_insert(
+/*=======*/
+	ib_rbt_t*	tree,			/*!< in: rb tree */
+	const void*	key,			/*!< in: key for ordering */
+	const void*	value);			/*!< in: data that will be
+						copied to the node.*/
+/**********************************************************************//**
+Add a new node to the tree, useful for data that is pre-sorted.
+@return	appended node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_add_node(
+/*=========*/
+	ib_rbt_t*	tree,			/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,			/*!< in: parent */
+	const void*	value);			/*!< in: this value is copied
+						to the node */
+/**********************************************************************//**
+Return the left most data node in the tree
+@return	left most node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_first(
+/*======*/
+	const ib_rbt_t*	tree);			/*!< in: rb tree */
+/**********************************************************************//**
+Return the right most data node in the tree
+@return	right most node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_last(
+/*=====*/
+	const ib_rbt_t*	tree);			/*!< in: rb tree */
+/**********************************************************************//**
+Return the next node from current.
+@return	successor node to current that is passed in. */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_next(
+/*=====*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree */
+	const ib_rbt_node_t*			/* in: current node */
+			current);
+/**********************************************************************//**
+Return the prev node from current.
+@return	precedessor node to current that is passed in */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_prev(
+/*=====*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree */
+	const ib_rbt_node_t*			/* in: current node */
+			current);
+/**********************************************************************//**
+Find the node that has the lowest key that is >= key.
+@return	node that satisfies the lower bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lower_bound(
+/*============*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree */
+	const void*	key);			/*!< in: key to search */
+/**********************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return	node that satisifies the upper bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_upper_bound(
+/*============*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree */
+	const void*	key);			/*!< in: key to search */
+/**********************************************************************//**
+Search for the key, a node will be retuned in parent.last, whether it
+was found or not. If not found then parent.last will contain the
+parent node for the possibly new key otherwise the matching node.
+@return	result of last comparison */
+UNIV_INTERN
+int
+rbt_search(
+/*=======*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,			/*!< in: search bounds */
+	const void*	key);			/*!< in: key to search */
+/**********************************************************************//**
+Search for the key, a node will be retuned in parent.last, whether it
+was found or not. If not found then parent.last will contain the
+parent node for the possibly new key otherwise the matching node.
+@return	result of last comparison */
+UNIV_INTERN
+int
+rbt_search_cmp(
+/*===========*/
+	const ib_rbt_t*	tree,			/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,			/*!< in: search bounds */
+	const void*	key,			/*!< in: key to search */
+	ib_rbt_compare	compare,		/*!< in: comparator */
+	ib_rbt_arg_compare
+			arg_compare);		/*!< in: fn to compare items
+						with argument */
+/**********************************************************************//**
+Clear the tree, deletes (and free's) all the nodes. */
+UNIV_INTERN
+void
+rbt_clear(
+/*======*/
+	ib_rbt_t*	tree);			/*!< in: rb tree */
+/**********************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+@return	no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq(
+/*===========*/
+	ib_rbt_t*	dst,			/*!< in: dst rb tree */
+	const ib_rbt_t*	src);			/*!< in: src rb tree */
+/**********************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+Delete the nodes from src after copying node to dst. As a side effect
+the duplicates will be left untouched in the src, since we don't support
+duplicates (yet). NOTE: src and dst must be similar, the function doesn't
+check for this condition (yet).
+@return	no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq_destructive(
+/*=======================*/
+	ib_rbt_t*	dst,			/*!< in: dst rb tree */
+	ib_rbt_t*	src);			/*!< in: src rb tree */
+/**********************************************************************//**
+Verify the integrity of the RB tree. For debugging. 0 failure else height
+of tree (in count of black nodes).
+@return	TRUE if OK FALSE if tree invalid. */
+UNIV_INTERN
+ibool
+rbt_validate(
+/*=========*/
+	const ib_rbt_t*	tree);			/*!< in: tree to validate */
+/**********************************************************************//**
+Iterate over the tree in depth first order. */
+UNIV_INTERN
+void
+rbt_print(
+/*======*/
+	const ib_rbt_t*		tree,		/*!< in: tree to traverse */
+	ib_rbt_print_node	print);		/*!< in: print function */
+
+#endif /* INNOBASE_UT0RBT_H */
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
new file mode 100644
index 00000000000..53b769849a5
--- /dev/null
+++ b/storage/innobase/include/ut0rnd.h
@@ -0,0 +1,148 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0rnd.h
+Random numbers and hashing
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0rnd_h
+#define ut0rnd_h
+
+#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "ut0byte.h"
+
+/** The 'character code' for end of field or string (used
+in folding records */
+#define UT_END_OF_FIELD		257
+
+/********************************************************//**
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+	ulint	 seed);		 /*!< in: seed */
+/********************************************************//**
+The following function generates a series of 'random' ulint integers.
+@return	the next 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+	ulint	rnd);	/*!< in: the previous random number value */
+/*********************************************************//**
+The following function generates 'random' ulint integers which
+enumerate the value space (let there be N of them) of ulint integers
+in a pseudo-random fashion. Note that the same integer is repeated
+always after N calls to the generator.
+@return	the 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void);
+/*==================*/
+/********************************************************//**
+Generates a random integer from a given interval.
+@return	the 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+	ulint	low,	/*!< in: low limit; can generate also this value */
+	ulint	high);	/*!< in: high limit; can generate also this value */
+/*********************************************************//**
+Generates a random iboolean value.
+@return	the random value */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void);
+/*=================*/
+/*******************************************************//**
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime or some
+random number to work reliably.
+@return	hash value */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*==========*/
+	ulint	 key,		/*!< in: value to be hashed */
+	ulint	 table_size);	/*!< in: hash table size */
+/*************************************************************//**
+Folds a 64-bit integer.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_ull(
+/*========*/
+	ib_uint64_t	d)	/*!< in: 64-bit integer */
+	__attribute__((const));
+/*************************************************************//**
+Folds a character string ending in the null character.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+	const char*	str)	/*!< in: null-terminated string */
+	__attribute__((pure));
+/***********************************************************//**
+Looks for a prime number slightly greater than the given argument.
+The prime is chosen so that it is not near any power of 2.
+@return	prime */
+UNIV_INTERN
+ulint
+ut_find_prime(
+/*==========*/
+	ulint	n)	/*!< in: positive number > 100 */
+	__attribute__((const));
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/*************************************************************//**
+Folds a pair of ulints.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+	ulint	n1,	/*!< in: ulint */
+	ulint	n2)	/*!< in: ulint */
+	__attribute__((const));
+/*************************************************************//**
+Folds a binary string.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+	const byte*	str,	/*!< in: string of bytes */
+	ulint		len)	/*!< in: length */
+	__attribute__((pure));
+
+
+#ifndef UNIV_NONINL
+#include "ut0rnd.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
new file mode 100644
index 00000000000..024c59e553b
--- /dev/null
+++ b/storage/innobase/include/ut0rnd.ic
@@ -0,0 +1,255 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0rnd.ic
+Random numbers and hashing
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+#define UT_HASH_RANDOM_MASK	1463735687
+#define UT_HASH_RANDOM_MASK2	1653893711
+
+#ifndef UNIV_INNOCHECKSUM
+
+#define UT_RND1			151117737
+#define UT_RND2			119785373
+#define UT_RND3			 85689495
+#define UT_RND4			 76595339
+#define UT_SUM_RND2		 98781234
+#define UT_SUM_RND3		126792457
+#define UT_SUM_RND4		 63498502
+#define UT_XOR_RND1		187678878
+#define UT_XOR_RND2		143537923
+
+/** Seed value of ut_rnd_gen_ulint() */
+extern	ulint	 ut_rnd_ulint_counter;
+
+/********************************************************//**
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+	ulint	 seed)		 /*!< in: seed */
+{
+	ut_rnd_ulint_counter = seed;
+}
+
+/********************************************************//**
+The following function generates a series of 'random' ulint integers.
+@return	the next 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+	ulint	rnd)	/*!< in: the previous random number value */
+{
+	ulint	n_bits;
+
+	n_bits = 8 * sizeof(ulint);
+
+	rnd = UT_RND2 * rnd + UT_SUM_RND3;
+	rnd = UT_XOR_RND1 ^ rnd;
+	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
+	rnd = UT_RND3 * rnd + UT_SUM_RND4;
+	rnd = UT_XOR_RND2 ^ rnd;
+	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
+	rnd = UT_RND1 * rnd + UT_SUM_RND2;
+
+	return(rnd);
+}
+
+/********************************************************//**
+The following function generates 'random' ulint integers which
+enumerate the value space of ulint integers in a pseudo random
+fashion. Note that the same integer is repeated always after
+2 to power 32 calls to the generator (if ulint is 32-bit).
+@return	the 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void)
+/*==================*/
+{
+	ulint	rnd;
+
+	ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
+
+	rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
+
+	return(rnd);
+}
+
+/********************************************************//**
+Generates a random integer from a given interval.
+@return	the 'random' number */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+	ulint	low,	/*!< in: low limit; can generate also this value */
+	ulint	high)	/*!< in: high limit; can generate also this value */
+{
+	ulint	rnd;
+
+	ut_ad(high >= low);
+
+	if (low == high) {
+
+		return(low);
+	}
+
+	rnd = ut_rnd_gen_ulint();
+
+	return(low + (rnd % (high - low)));
+}
+
+/*********************************************************//**
+Generates a random iboolean value.
+@return	the random value */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void)
+/*=================*/
+{
+	ulint	 x;
+
+	x = ut_rnd_gen_ulint();
+
+	if (((x >> 20) + (x >> 15)) & 1) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*******************************************************//**
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime
+or some random number for the hash table to work reliably.
+@return	hash value */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*==========*/
+	ulint	 key,		/*!< in: value to be hashed */
+	ulint	 table_size)	/*!< in: hash table size */
+{
+	ut_ad(table_size);
+	key = key ^ UT_HASH_RANDOM_MASK2;
+
+	return(key % table_size);
+}
+
+/*************************************************************//**
+Folds a 64-bit integer.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_ull(
+/*========*/
+	ib_uint64_t	d)	/*!< in: 64-bit integer */
+{
+	return(ut_fold_ulint_pair((ulint) d & ULINT32_MASK,
+				  (ulint) (d >> 32)));
+}
+
+/*************************************************************//**
+Folds a character string ending in the null character.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+	const char*	str)	/*!< in: null-terminated string */
+{
+	ulint	fold = 0;
+
+	ut_ad(str);
+
+	while (*str != '\0') {
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str));
+		str++;
+	}
+
+	return(fold);
+}
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/*************************************************************//**
+Folds a pair of ulints.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+	ulint	n1,	/*!< in: ulint */
+	ulint	n2)	/*!< in: ulint */
+{
+	return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
+		^ UT_HASH_RANDOM_MASK) + n2);
+}
+
+/*************************************************************//**
+Folds a binary string.
+@return	folded value */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+	const byte*	str,	/*!< in: string of bytes */
+	ulint		len)	/*!< in: length */
+{
+	ulint		fold = 0;
+	const byte*	str_end	= str + (len & 0xFFFFFFF8);
+
+	ut_ad(str || !len);
+
+	while (str < str_end) {
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	}
+
+	switch (len & 0x7) {
+	case 7:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	case 6:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	case 5:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	case 4:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	case 3:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	case 2:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	case 1:
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
+	}
+
+	return(fold);
+}
diff --git a/storage/innobase/include/ut0sort.h b/storage/innobase/include/ut0sort.h
new file mode 100644
index 00000000000..75648b5c317
--- /dev/null
+++ b/storage/innobase/include/ut0sort.h
@@ -0,0 +1,106 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0sort.h
+Sort utility
+
+Created 11/9/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0sort_h
+#define ut0sort_h
+
+#include "univ.i"
+
+/* This module gives a macro definition of the body of
+a standard sort function for an array of elements of any
+type. The comparison function is given as a parameter to
+the macro. The sort algorithm is mergesort which has logarithmic
+worst case.
+*/
+
+/*******************************************************************//**
+This macro expands to the body of a standard sort function.
+The sort function uses mergesort and must be defined separately
+for each type of array.
+Also the comparison function has to be defined individually
+for each array cell type. SORT_FUN is the sort function name.
+The function takes the array to be sorted (ARR),
+the array of auxiliary space (AUX_ARR) of same size,
+and the low (LOW), inclusive, and high (HIGH), noninclusive,
+limits for the sort interval as arguments.
+CMP_FUN is the comparison function name. It takes as arguments
+two elements from the array and returns 1, if the first is bigger,
+0 if equal, and -1 if the second bigger. */
+
+#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
+{\
+	ulint		ut_sort_mid77;\
+	ulint		ut_sort_i77;\
+	ulint		ut_sort_low77;\
+	ulint		ut_sort_high77;\
+\
+	ut_ad((LOW) < (HIGH));\
+	ut_ad(ARR);\
+	ut_ad(AUX_ARR);\
+\
+	if ((LOW) == (HIGH) - 1) {\
+		return;\
+	} else if ((LOW) == (HIGH) - 2) {\
+		if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\
+			(AUX_ARR)[LOW] = (ARR)[LOW];\
+			(ARR)[LOW] = (ARR)[(HIGH) - 1];\
+			(ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\
+		}\
+		return;\
+	}\
+\
+	ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\
+\
+	SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\
+	SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\
+\
+	ut_sort_low77 = (LOW);\
+	ut_sort_high77 = ut_sort_mid77;\
+\
+	for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
+\
+		if (ut_sort_low77 >= ut_sort_mid77) {\
+			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
+			ut_sort_high77++;\
+		} else if (ut_sort_high77 >= (HIGH)) {\
+			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
+			ut_sort_low77++;\
+		} else if (CMP_FUN((ARR)[ut_sort_low77],\
+				   (ARR)[ut_sort_high77]) > 0) {\
+			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
+			ut_sort_high77++;\
+		} else {\
+			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
+			ut_sort_low77++;\
+		}\
+	}\
+\
+	memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\
+	       ((HIGH) - (LOW)) * sizeof *(ARR));\
+}\
+
+
+#endif
+
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
new file mode 100644
index 00000000000..0caf379d8fa
--- /dev/null
+++ b/storage/innobase/include/ut0ut.h
@@ -0,0 +1,497 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0ut.h
+Various utilities
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0ut_h
+#define ut0ut_h
+
+#include "univ.i"
+
+#ifndef UNIV_INNOCHECKSUM
+
+#include "db0err.h"
+
+#ifndef UNIV_HOTBACKUP
+# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
+#endif /* UNIV_HOTBACKUP */
+
+#include <time.h>
+#ifndef MYSQL_SERVER
+#include <ctype.h>
+#endif
+
+#include <stdarg.h> /* for va_list */
+
+/** Index name prefix in fast index creation */
+#define	TEMP_INDEX_PREFIX	'\377'
+/** Index name prefix in fast index creation, as a string constant */
+#define TEMP_INDEX_PREFIX_STR	"\377"
+
+/** Time stamp */
+typedef time_t	ib_time_t;
+
+/* In order to call a piece of code, when a function returns or when the
+scope ends, use this utility class.  It will invoke the given function
+object in its destructor. */
+template<typename F>
+struct ut_when_dtor {
+	ut_when_dtor(F& p) : f(p) {}
+	~ut_when_dtor() {
+		f();
+	}
+private:
+	F& f;
+};
+
+#ifndef UNIV_HOTBACKUP
+# if defined(HAVE_PAUSE_INSTRUCTION)
+   /* According to the gcc info page, asm volatile means that the
+   instruction has important side-effects and must not be removed.
+   Also asm volatile may trigger a memory barrier (spilling all registers
+   to memory). */
+#  ifdef __SUNPRO_CC
+#   define UT_RELAX_CPU() asm ("pause" )
+#  else
+#   define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
+#  endif /* __SUNPRO_CC */
+
+# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
+#  define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
+# elif defined(HAVE_ATOMIC_BUILTINS)
+#  define UT_RELAX_CPU() do { \
+     volatile lint	volatile_var; \
+     os_compare_and_swap_lint(&volatile_var, 0, 1); \
+   } while (0)
+# elif defined(HAVE_WINDOWS_ATOMICS)
+   /* In the Win32 API, the x86 PAUSE instruction is executed by calling
+   the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+   independent way by using YieldProcessor. */
+#  define UT_RELAX_CPU() YieldProcessor()
+# else
+#  define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
+# endif
+
+/*********************************************************************//**
+Delays execution for at most max_wait_us microseconds or returns earlier
+if cond becomes true.
+@param cond		in: condition to wait for; evaluated every 2 ms
+@param max_wait_us	in: maximum delay to wait, in microseconds */
+#define UT_WAIT_FOR(cond, max_wait_us)				\
+do {								\
+	ullint	start_us;					\
+	start_us = ut_time_us(NULL);				\
+	while (!(cond) 						\
+	       && ut_time_us(NULL) - start_us < (max_wait_us)) {\
+								\
+		os_thread_sleep(2000 /* 2 ms */);		\
+	}							\
+} while (0)
+#endif /* !UNIV_HOTBACKUP */
+
+template <class T> T ut_min(T a, T b) { return(a < b ? a : b); }
+template <class T> T ut_max(T a, T b) { return(a > b ? a : b); }
+
+/******************************************************//**
+Calculates the minimum of two ulints.
+@return	minimum */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2);	/*!< in: second number */
+/******************************************************//**
+Calculates the maximum of two ulints.
+@return	maximum */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2);	/*!< in: second number */
+/****************************************************************//**
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+	ulint*	a,	/*!< out: more significant part of minimum */
+	ulint*	b,	/*!< out: less significant part of minimum */
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	b1,	/*!< in: less significant part of first pair */
+	ulint	a2,	/*!< in: more significant part of second pair */
+	ulint	b2);	/*!< in: less significant part of second pair */
+/******************************************************//**
+Compares two ulints.
+@return	1 if a > b, 0 if a == b, -1 if a < b */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	b);	/*!< in: ulint */
+/*******************************************************//**
+Compares two pairs of ulints.
+@return	-1 if a < b, 0 if a == b, 1 if a > b */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	a2,	/*!< in: less significant part of first pair */
+	ulint	b1,	/*!< in: more significant part of second pair */
+	ulint	b2);	/*!< in: less significant part of second pair */
+/*************************************************************//**
+Determines if a number is zero or a power of two.
+@param n	in: number
+@return		nonzero if n is zero or a power of two; zero otherwise */
+#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
+/*************************************************************//**
+Calculates fast the remainder of n/m when m is a power of two.
+@param n	in: numerator
+@param m	in: denominator, must be a power of two
+@return		the remainder of n/m */
+#define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
+/*************************************************************//**
+Calculates the biggest multiple of m that is not bigger than n
+when m is a power of two.  In other words, rounds n down to m * k.
+@param n	in: number to round down
+@param m	in: alignment, must be a power of two
+@return		n rounded down to the biggest possible integer multiple of m */
+#define ut_2pow_round(n, m) ((n) & ~((m) - 1))
+/** Align a number down to a multiple of a power of two.
+@param n	in: number to round down
+@param m	in: alignment, must be a power of two
+@return		n rounded down to the biggest possible integer multiple of m */
+#define ut_calc_align_down(n, m) ut_2pow_round(n, m)
+/********************************************************//**
+Calculates the smallest multiple of m that is not smaller than n
+when m is a power of two.  In other words, rounds n up to m * k.
+@param n	in: number to round up
+@param m	in: alignment, must be a power of two
+@return		n rounded up to the smallest possible integer multiple of m */
+#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
+/*************************************************************//**
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer.
+@return	logarithm in the base 2, rounded upward */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+	ulint	n);	/*!< in: number */
+/*************************************************************//**
+Calculates 2 to power n.
+@return	2 to power n */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+	ulint	n);	/*!< in: number */
+/*************************************************************//**
+Calculates fast the number rounded up to the nearest power of 2.
+@return	first power of 2 which is >= n */
+UNIV_INTERN
+ulint
+ut_2_power_up(
+/*==========*/
+	ulint	n)	/*!< in: number != 0 */
+	__attribute__((const));
+
+/** Determine how many bytes (groups of 8 bits) are needed to
+store the given number of bits.
+@param b	in: bits
+@return		number of bytes (octets) needed to represent b */
+#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
+
+/**********************************************************//**
+Returns system time. We do not specify the format of the time returned:
+the only way to manipulate it is to use the function ut_difftime.
+@return	system time */
+UNIV_INTERN
+ib_time_t
+ut_time(void);
+/*=========*/
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Returns system time.
+Upon successful completion, the value 0 is returned; otherwise the
+value -1 is returned and the global variable errno is set to indicate the
+error.
+@return	0 on success, -1 otherwise */
+UNIV_INTERN
+int
+ut_usectime(
+/*========*/
+	ulint*	sec,	/*!< out: seconds since the Epoch */
+	ulint*	ms);	/*!< out: microseconds since the Epoch+*sec */
+
+/**********************************************************//**
+Returns the number of microseconds since epoch. Similar to
+time(3), the return value is also stored in *tloc, provided
+that tloc is non-NULL.
+@return	us since epoch */
+UNIV_INTERN
+ullint
+ut_time_us(
+/*=======*/
+	ullint*	tloc);	/*!< out: us since epoch, if non-NULL */
+/**********************************************************//**
+Returns the number of milliseconds since some epoch.  The
+value may wrap around.  It should only be used for heuristic
+purposes.
+@return	ms since epoch */
+UNIV_INTERN
+ulint
+ut_time_ms(void);
+/*============*/
+#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Returns the number of milliseconds since some epoch.  The
+value may wrap around.  It should only be used for heuristic
+purposes.
+@return ms since epoch */
+UNIV_INTERN
+ulint
+ut_time_ms(void);
+/*============*/
+
+/**********************************************************//**
+Returns the difference of two times in seconds.
+@return	time2 - time1 expressed in seconds */
+UNIV_INTERN
+double
+ut_difftime(
+/*========*/
+	ib_time_t	time2,	/*!< in: time */
+	ib_time_t	time1);	/*!< in: time */
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+/**********************************************************//**
+Prints a timestamp to a file. */
+UNIV_INTERN
+void
+ut_print_timestamp(
+/*===============*/
+	FILE*	file)	/*!< in: file where to print */
+	UNIV_COLD __attribute__((nonnull));
+
+#ifndef UNIV_INNOCHECKSUM
+
+/**********************************************************//**
+Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
+UNIV_INTERN
+void
+ut_sprintf_timestamp(
+/*=================*/
+	char*	buf); /*!< in: buffer where to sprintf */
+#ifdef UNIV_HOTBACKUP
+/**********************************************************//**
+Sprintfs a timestamp to a buffer with no spaces and with ':' characters
+replaced by '_'. */
+UNIV_INTERN
+void
+ut_sprintf_timestamp_without_extra_chars(
+/*=====================================*/
+	char*	buf); /*!< in: buffer where to sprintf */
+/**********************************************************//**
+Returns current year, month, day. */
+UNIV_INTERN
+void
+ut_get_year_month_day(
+/*==================*/
+	ulint*	year,	/*!< out: current year */
+	ulint*	month,	/*!< out: month */
+	ulint*	day);	/*!< out: day */
+#else /* UNIV_HOTBACKUP */
+/*************************************************************//**
+Runs an idle loop on CPU. The argument gives the desired delay
+in microseconds on 100 MHz Pentium + Visual C++.
+@return	dummy value */
+UNIV_INTERN
+ulint
+ut_delay(
+/*=====*/
+	ulint	delay);	/*!< in: delay in microseconds on 100 MHz Pentium */
+#endif /* UNIV_HOTBACKUP */
+/*************************************************************//**
+Prints the contents of a memory buffer in hex and ascii. */
+UNIV_INTERN
+void
+ut_print_buf(
+/*=========*/
+	FILE*		file,	/*!< in: file where to print */
+	const void*	buf,	/*!< in: memory buffer */
+	ulint		len);	/*!< in: length of the buffer */
+
+/**********************************************************************//**
+Outputs a NUL-terminated file name, quoted with apostrophes. */
+UNIV_INTERN
+void
+ut_print_filename(
+/*==============*/
+	FILE*		f,	/*!< in: output stream */
+	const char*	name);	/*!< in: name to print */
+
+#ifndef UNIV_HOTBACKUP
+/* Forward declaration of transaction handle */
+struct trx_t;
+
+/**********************************************************************//**
+Outputs a fixed-length string, quoted as an SQL identifier.
+If the string contains a slash '/', the string will be
+output as two identifiers separated by a period (.),
+as in SQL database_name.identifier. */
+UNIV_INTERN
+void
+ut_print_name(
+/*==========*/
+	FILE*		f,	/*!< in: output stream */
+	const trx_t*	trx,	/*!< in: transaction */
+	ibool		table_id,/*!< in: TRUE=print a table name,
+				FALSE=print other identifier */
+	const char*	name);	/*!< in: name to print */
+
+/**********************************************************************//**
+Outputs a fixed-length string, quoted as an SQL identifier.
+If the string contains a slash '/', the string will be
+output as two identifiers separated by a period (.),
+as in SQL database_name.identifier. */
+UNIV_INTERN
+void
+ut_print_namel(
+/*===========*/
+	FILE*		f,	/*!< in: output stream */
+	const trx_t*	trx,	/*!< in: transaction (NULL=no quotes) */
+	ibool		table_id,/*!< in: TRUE=print a table name,
+				FALSE=print other identifier */
+	const char*	name,	/*!< in: name to print */
+	ulint		namelen);/*!< in: length of name */
+
+/**********************************************************************//**
+Formats a table or index name, quoted as an SQL identifier. If the name
+contains a slash '/', the result will contain two identifiers separated by
+a period (.), as in SQL database_name.identifier.
+@return pointer to 'formatted' */
+UNIV_INTERN
+char*
+ut_format_name(
+/*===========*/
+	const char*	name,		/*!< in: table or index name, must be
+					'\0'-terminated */
+	ibool		is_table,	/*!< in: if TRUE then 'name' is a table
+					name */
+	char*		formatted,	/*!< out: formatted result, will be
+					'\0'-terminated */
+	ulint		formatted_size);/*!< out: no more than this number of
+					bytes will be written to 'formatted' */
+
+/**********************************************************************//**
+Catenate files. */
+UNIV_INTERN
+void
+ut_copy_file(
+/*=========*/
+	FILE*	dest,	/*!< in: output file */
+	FILE*	src);	/*!< in: input file to be appended to output */
+#endif /* !UNIV_HOTBACKUP */
+
+#ifdef __WIN__
+/**********************************************************************//**
+A substitute for vsnprintf(3), formatted output conversion into
+a limited buffer. Note: this function DOES NOT return the number of
+characters that would have been printed if the buffer was unlimited because
+VC's _vsnprintf() returns -1 in this case and we would need to call
+_vscprintf() in addition to estimate that but we would need another copy
+of "ap" for that and VC does not provide va_copy(). */
+UNIV_INTERN
+void
+ut_vsnprintf(
+/*=========*/
+	char*		str,	/*!< out: string */
+	size_t		size,	/*!< in: str size */
+	const char*	fmt,	/*!< in: format */
+	va_list		ap);	/*!< in: format values */
+
+/**********************************************************************//**
+A substitute for snprintf(3), formatted output conversion into
+a limited buffer.
+@return number of characters that would have been printed if the size
+were unlimited, not including the terminating '\0'. */
+UNIV_INTERN
+int
+ut_snprintf(
+/*========*/
+	char*		str,	/*!< out: string */
+	size_t		size,	/*!< in: str size */
+	const char*	fmt,	/*!< in: format */
+	...);			/*!< in: format values */
+#else
+/**********************************************************************//**
+A wrapper for vsnprintf(3), formatted output conversion into
+a limited buffer. Note: this function DOES NOT return the number of
+characters that would have been printed if the buffer was unlimited because
+VC's _vsnprintf() returns -1 in this case and we would need to call
+_vscprintf() in addition to estimate that but we would need another copy
+of "ap" for that and VC does not provide va_copy(). */
+# define ut_vsnprintf(buf, size, fmt, ap)	\
+	((void) vsnprintf(buf, size, fmt, ap))
+/**********************************************************************//**
+A wrapper for snprintf(3), formatted output conversion into
+a limited buffer. */
+# define ut_snprintf	snprintf
+#endif /* __WIN__ */
+
+/*************************************************************//**
+Convert an error number to a human readable text message. The
+returned string is static and should not be freed or modified.
+@return	string, describing the error */
+UNIV_INTERN
+const char*
+ut_strerr(
+/*======*/
+	dberr_t	num);	/*!< in: error number */
+
+/****************************************************************
+Sort function for ulint arrays. */
+UNIV_INTERN
+void
+ut_ulint_sort(
+/*==========*/
+	ulint*	arr,		/*!< in/out: array to sort */
+	ulint*	aux_arr,	/*!< in/out: aux array to use in sort */
+	ulint	low,		/*!< in: lower bound */
+	ulint	high)		/*!< in: upper bound */
+	__attribute__((nonnull));
+
+#ifndef UNIV_NONINL
+#include "ut0ut.ic"
+#endif
+
+#endif /* !UNIV_INNOCHECKSUM */
+
+#endif
+
diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic
new file mode 100644
index 00000000000..4e0f76e1957
--- /dev/null
+++ b/storage/innobase/include/ut0ut.ic
@@ -0,0 +1,162 @@
+/*****************************************************************************
+
+Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************************//**
+@file include/ut0ut.ic
+Various utilities
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/******************************************************//**
+Calculates the minimum of two ulints.
+@return	minimum */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2)	/*!< in: second number */
+{
+	return((n1 <= n2) ? n1 : n2);
+}
+
+/******************************************************//**
+Calculates the maximum of two ulints.
+@return	maximum */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+	ulint	 n1,	/*!< in: first number */
+	ulint	 n2)	/*!< in: second number */
+{
+	return((n1 <= n2) ? n2 : n1);
+}
+
+/****************************************************************//**
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+	ulint*	a,	/*!< out: more significant part of minimum */
+	ulint*	b,	/*!< out: less significant part of minimum */
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	b1,	/*!< in: less significant part of first pair */
+	ulint	a2,	/*!< in: more significant part of second pair */
+	ulint	b2)	/*!< in: less significant part of second pair */
+{
+	if (a1 == a2) {
+		*a = a1;
+		*b = ut_min(b1, b2);
+	} else if (a1 < a2) {
+		*a = a1;
+		*b = b1;
+	} else {
+		*a = a2;
+		*b = b2;
+	}
+}
+
+/******************************************************//**
+Compares two ulints.
+@return	1 if a > b, 0 if a == b, -1 if a < b */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+	ulint	a,	/*!< in: ulint */
+	ulint	b)	/*!< in: ulint */
+{
+	if (a < b) {
+		return(-1);
+	} else if (a == b) {
+		return(0);
+	} else {
+		return(1);
+	}
+}
+
+/*******************************************************//**
+Compares two pairs of ulints.
+@return	-1 if a < b, 0 if a == b, 1 if a > b */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+	ulint	a1,	/*!< in: more significant part of first pair */
+	ulint	a2,	/*!< in: less significant part of first pair */
+	ulint	b1,	/*!< in: more significant part of second pair */
+	ulint	b2)	/*!< in: less significant part of second pair */
+{
+	if (a1 > b1) {
+		return(1);
+	} else if (a1 < b1) {
+		return(-1);
+	} else if (a2 > b2) {
+		return(1);
+	} else if (a2 < b2) {
+		return(-1);
+	} else {
+		return(0);
+	}
+}
+
+/*************************************************************//**
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer.
+@return	logarithm in the base 2, rounded upward */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+	ulint	n)	/*!< in: number != 0 */
+{
+	ulint	res;
+
+	res = 0;
+
+	ut_ad(n > 0);
+
+	n = n - 1;
+
+	for (;;) {
+		n = n / 2;
+
+		if (n == 0) {
+			break;
+		}
+
+		res++;
+	}
+
+	return(res + 1);
+}
+
+/*************************************************************//**
+Calculates 2 to power n.
+@return	2 to power n */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+	ulint	n)	/*!< in: number */
+{
+	return((ulint) 1 << n);
+}
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
new file mode 100644
index 00000000000..432fb348a09
--- /dev/null
+++ b/storage/innobase/include/ut0vec.h
@@ -0,0 +1,337 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0vec.h
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
+#ifndef IB_VECTOR_H
+#define IB_VECTOR_H
+
+#include "univ.i"
+#include "mem0mem.h"
+
+struct ib_alloc_t;
+struct ib_vector_t;
+
+typedef void* (*ib_mem_alloc_t)(
+					/* out: Pointer to allocated memory */
+	ib_alloc_t*	allocator,	/* in: Pointer to allocator instance */
+	ulint		size);		/* in: Number of bytes to allocate */
+
+typedef void (*ib_mem_free_t)(
+	ib_alloc_t*	allocator,	/* in: Pointer to allocator instance */
+	void*		ptr);		/* in: Memory to free */
+
+typedef void* (*ib_mem_resize_t)(
+					/* out: Pointer to resized memory */
+	ib_alloc_t*	allocator,	/* in: Pointer to allocator */
+	void*		ptr,		/* in: Memory to resize */
+	ulint		old_size,	/* in: Old memory size in bytes */
+	ulint		new_size);	/* in: New size in bytes */
+
+typedef int (*ib_compare_t)(const void*, const void*);
+
+/* An automatically resizing vector datatype with the following properties:
+
+ -All memory allocation is done through an allocator, which is  responsible for
+freeing it when done with the vector.
+*/
+
+/* This is useful shorthand for elements of type void* */
+#define	ib_vector_getp(v, n)	(*(void**) ib_vector_get(v, n))
+#define	ib_vector_getp_const(v, n)	(*(void**) ib_vector_get_const(v, n))
+
+#define ib_vector_allocator(v)	(v->allocator)
+
+/********************************************************************
+Create a new vector with the given initial size. */
+UNIV_INTERN
+ib_vector_t*
+ib_vector_create(
+/*=============*/
+					/* out: vector */
+	ib_alloc_t*	alloc,		/* in: Allocator */
+					/* in: size of the data item */
+	ulint		sizeof_value,
+	ulint		size);		/* in: initial size */
+
+/********************************************************************
+Destroy the vector. Make sure the vector owns the allocator, e.g.,
+the heap in the the heap allocator. */
+UNIV_INLINE
+void
+ib_vector_free(
+/*===========*/
+	ib_vector_t*	vec);		/* in/out: vector */
+
+/********************************************************************
+Push a new element to the vector, increasing its size if necessary,
+if elem is not NULL then elem is copied to the vector.*/
+UNIV_INLINE
+void*
+ib_vector_push(
+/*===========*/
+					/* out: pointer the "new" element */
+	ib_vector_t*	vec,		/* in/out: vector */
+	const void*	elem);		/* in: data element */
+
+/********************************************************************
+Pop the last element from the vector.*/
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+					/* out: pointer to the "new" element */
+	ib_vector_t*	vec);		/* in/out: vector */
+
+/*******************************************************************//**
+Remove an element to the vector
+@return pointer to the "removed" element */
+UNIV_INLINE
+void*
+ib_vector_remove(
+/*=============*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	const void*	elem);	/*!< in: value to remove */
+
+/********************************************************************
+Get the number of elements in the vector. */
+UNIV_INLINE
+ulint
+ib_vector_size(
+/*===========*/
+					/* out: number of elements in vector */
+	const ib_vector_t*	vec);	/* in: vector */
+
+/********************************************************************
+Increase the size of the vector. */
+UNIV_INTERN
+void
+ib_vector_resize(
+/*=============*/
+					/* out: number of elements in vector */
+	ib_vector_t*	vec);		/* in/out: vector */
+
+/********************************************************************
+Test whether a vector is empty or not.
+@return TRUE if empty */
+UNIV_INLINE
+ibool
+ib_vector_is_empty(
+/*===============*/
+	const ib_vector_t*	vec);    /*!< in: vector */
+
+/****************************************************************//**
+Get the n'th element.
+@return	n'th element */
+UNIV_INLINE
+void*
+ib_vector_get(
+/*==========*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	ulint		n);	/*!< in: element index to get */
+
+/********************************************************************
+Const version of the get n'th element.
+@return n'th element */
+UNIV_INLINE
+const void*
+ib_vector_get_const(
+/*================*/
+	const ib_vector_t*	vec,	/* in: vector */
+	ulint			n);	/* in: element index to get */
+/****************************************************************//**
+Get last element. The vector must not be empty.
+@return	last element */
+UNIV_INLINE
+void*
+ib_vector_get_last(
+/*===============*/
+	ib_vector_t*	vec);	/*!< in: vector */
+/****************************************************************//**
+Set the n'th element. */
+UNIV_INLINE
+void
+ib_vector_set(
+/*==========*/
+	ib_vector_t*	vec,	/*!< in/out: vector */
+	ulint		n,	/*!< in: element index to set */
+	void*		elem);	/*!< in: data element */
+
+/********************************************************************
+Reset the vector size to 0 elements. */
+UNIV_INLINE
+void
+ib_vector_reset(
+/*============*/
+	ib_vector_t*	vec);		/* in/out: vector */
+
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+void*
+ib_vector_last(
+/*===========*/
+					/* out: pointer to last element */
+	ib_vector_t*	vec);		/* in/out: vector */
+
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+const void*
+ib_vector_last_const(
+/*=================*/
+					/* out: pointer to last element */
+	const ib_vector_t*	vec);	/* in: vector */
+
+/********************************************************************
+Sort the vector elements. */
+UNIV_INLINE
+void
+ib_vector_sort(
+/*===========*/
+	ib_vector_t*	vec,		/* in/out: vector */
+	ib_compare_t	compare);	/* in: the comparator to use for sort */
+
+/********************************************************************
+The default ib_vector_t heap free. Does nothing. */
+UNIV_INLINE
+void
+ib_heap_free(
+/*=========*/
+	ib_alloc_t*	allocator,	/* in: allocator */
+	void*		ptr);		/* in: size in bytes */
+
+/********************************************************************
+The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_malloc(
+/*===========*/
+					/* out: pointer to allocated memory */
+	ib_alloc_t*	allocator,	/* in: allocator */
+	ulint		size);		/* in: size in bytes */
+
+/********************************************************************
+The default ib_vector_t heap resize. Since we can't resize the heap
+we have to copy the elements from the old ptr to the new ptr.
+Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_resize(
+/*===========*/
+					/* out: pointer to reallocated
+					memory */
+	ib_alloc_t*	allocator,	/* in: allocator */
+	void*		old_ptr,	/* in: pointer to memory */
+	ulint		old_size,	/* in: old size in bytes */
+	ulint		new_size);	/* in: new size in bytes */
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+ib_alloc_t*
+ib_heap_allocator_create(
+/*=====================*/
+					/* out: heap allocator instance */
+	mem_heap_t*	heap);		/* in: heap to use */
+
+/********************************************************************
+Free a heap allocator. */
+UNIV_INLINE
+void
+ib_heap_allocator_free(
+/*===================*/
+	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
+
+/********************************************************************
+Wrapper for ut_free(). */
+UNIV_INLINE
+void
+ib_ut_free(
+/*=======*/
+	ib_alloc_t*	allocator,	/* in: allocator */
+	void*		ptr);		/* in: size in bytes */
+
+/********************************************************************
+Wrapper for ut_malloc(). */
+UNIV_INLINE
+void*
+ib_ut_malloc(
+/*=========*/
+					/* out: pointer to allocated memory */
+	ib_alloc_t*	allocator,	/* in: allocator */
+	ulint		size);		/* in: size in bytes */
+
+/********************************************************************
+Wrapper for ut_realloc(). */
+UNIV_INLINE
+void*
+ib_ut_resize(
+/*=========*/
+					/* out: pointer to reallocated
+					memory */
+	ib_alloc_t*	allocator,	/* in: allocator */
+	void*		old_ptr,	/* in: pointer to memory */
+	ulint		old_size,	/* in: old size in bytes */
+	ulint		new_size);	/* in: new size in bytes */
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+ib_alloc_t*
+ib_ut_allocator_create(void);
+/*=========================*/
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+void
+ib_ut_allocator_free(
+/*=================*/
+	ib_alloc_t*	ib_ut_alloc);	/* in: alloc instace to free */
+
+/* Allocator used by ib_vector_t. */
+struct ib_alloc_t {
+	ib_mem_alloc_t	mem_malloc;	/* For allocating memory */
+	ib_mem_free_t	mem_release;	/* For freeing memory */
+	ib_mem_resize_t	mem_resize;	/* For resizing memory */
+	void*		arg;		/* Currently if not NULL then it
+					points to the heap instance */
+};
+
+/* See comment at beginning of file. */
+struct ib_vector_t {
+	ib_alloc_t*	allocator;	/* Allocator, because one size
+					doesn't fit all */
+	void*		data;		/* data elements */
+	ulint		used;		/* number of elements currently used */
+	ulint		total;		/* number of elements allocated */
+					/* Size of a data item */
+	ulint		sizeof_value;
+};
+
+#ifndef UNIV_NONINL
+#include "ut0vec.ic"
+#endif
+
+#endif /* IB_VECTOR_H */
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
new file mode 100644
index 00000000000..f41a85e1d1d
--- /dev/null
+++ b/storage/innobase/include/ut0vec.ic
@@ -0,0 +1,425 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0vec.ic
+A vector of pointers to data items
+
+Created 4/6/2006 Osku Salerma
+************************************************************************/
+
+#define	IB_VEC_OFFSET(v, i)	(vec->sizeof_value * i)
+
+/********************************************************************
+The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_malloc(
+/*===========*/
+	ib_alloc_t*	allocator,	/* in: allocator */
+	ulint		size)		/* in: size in bytes */
+{
+	mem_heap_t*	heap = (mem_heap_t*) allocator->arg;
+
+	return(mem_heap_alloc(heap, size));
+}
+
+/********************************************************************
+The default ib_vector_t heap free. Does nothing. */
+UNIV_INLINE
+void
+ib_heap_free(
+/*=========*/
+	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
+	void*		ptr UNIV_UNUSED)	/* in: size in bytes */
+{
+	/* We can't free individual elements. */
+}
+
+/********************************************************************
+The default ib_vector_t heap resize. Since we can't resize the heap
+we have to copy the elements from the old ptr to the new ptr.
+Uses mem_heap_alloc(). */
+UNIV_INLINE
+void*
+ib_heap_resize(
+/*===========*/
+	ib_alloc_t*	allocator,	/* in: allocator */
+	void*		old_ptr,	/* in: pointer to memory */
+	ulint		old_size,	/* in: old size in bytes */
+	ulint		new_size)	/* in: new size in bytes */
+{
+	void*		new_ptr;
+	mem_heap_t*	heap = (mem_heap_t*) allocator->arg;
+
+	new_ptr = mem_heap_alloc(heap, new_size);
+	memcpy(new_ptr, old_ptr, old_size);
+
+	return(new_ptr);
+}
+
+/********************************************************************
+Create a heap allocator that uses the passed in heap. */
+UNIV_INLINE
+ib_alloc_t*
+ib_heap_allocator_create(
+/*=====================*/
+	mem_heap_t*	heap)		/* in: heap to use */
+{
+	ib_alloc_t*	heap_alloc;
+
+	heap_alloc = (ib_alloc_t*) mem_heap_alloc(heap, sizeof(*heap_alloc));
+
+	heap_alloc->arg = heap;
+	heap_alloc->mem_release = ib_heap_free;
+	heap_alloc->mem_malloc = ib_heap_malloc;
+	heap_alloc->mem_resize = ib_heap_resize;
+
+	return(heap_alloc);
+}
+
+/********************************************************************
+Free a heap allocator. */
+UNIV_INLINE
+void
+ib_heap_allocator_free(
+/*===================*/
+	ib_alloc_t*	ib_ut_alloc)	/* in: alloc instace to free */
+{
+	mem_heap_free((mem_heap_t*) ib_ut_alloc->arg);
+}
+
+/********************************************************************
+Wrapper around ut_malloc(). */
+UNIV_INLINE
+void*
+ib_ut_malloc(
+/*=========*/
+	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
+	ulint		size)			/* in: size in bytes */
+{
+	return(ut_malloc(size));
+}
+
+/********************************************************************
+Wrapper around ut_free(). */
+UNIV_INLINE
+void
+ib_ut_free(
+/*=======*/
+	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
+	void*		ptr)			/* in: size in bytes */
+{
+	ut_free(ptr);
+}
+
+/********************************************************************
+Wrapper aroung ut_realloc(). */
+UNIV_INLINE
+void*
+ib_ut_resize(
+/*=========*/
+	ib_alloc_t*	allocator UNIV_UNUSED,	/* in: allocator */
+	void*		old_ptr,	/* in: pointer to memory */
+	ulint		old_size UNIV_UNUSED,/* in: old size in bytes */
+	ulint		new_size)	/* in: new size in bytes */
+{
+	return(ut_realloc(old_ptr, new_size));
+}
+
+/********************************************************************
+Create a ut allocator. */
+UNIV_INLINE
+ib_alloc_t*
+ib_ut_allocator_create(void)
+/*========================*/
+{
+	ib_alloc_t*	ib_ut_alloc;
+
+	ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc));
+
+	ib_ut_alloc->arg = NULL;
+	ib_ut_alloc->mem_release = ib_ut_free;
+	ib_ut_alloc->mem_malloc = ib_ut_malloc;
+	ib_ut_alloc->mem_resize = ib_ut_resize;
+
+	return(ib_ut_alloc);
+}
+
+/********************************************************************
+Free a ut allocator. */
+UNIV_INLINE
+void
+ib_ut_allocator_free(
+/*=================*/
+	ib_alloc_t*	ib_ut_alloc)	/* in: alloc instace to free */
+{
+	ut_free(ib_ut_alloc);
+}
+
+/********************************************************************
+Get number of elements in vector. */
+UNIV_INLINE
+ulint
+ib_vector_size(
+/*===========*/
+					/* out: number of elements in vector*/
+	const ib_vector_t*	vec)	/* in: vector */
+{
+	return(vec->used);
+}
+
+/****************************************************************//**
+Get n'th element. */
+UNIV_INLINE
+void*
+ib_vector_get(
+/*==========*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	ulint		n)	/*!< in: element index to get */
+{
+	ut_a(n < vec->used);
+
+	return((byte*) vec->data + IB_VEC_OFFSET(vec, n));
+}
+
+/********************************************************************
+Const version of the get n'th element.
+@return n'th element */
+UNIV_INLINE
+const void*
+ib_vector_get_const(
+/*================*/
+	const ib_vector_t*	vec,	/* in: vector */
+	ulint			n)	/* in: element index to get */
+{
+	ut_a(n < vec->used);
+
+	return((byte*) vec->data + IB_VEC_OFFSET(vec, n));
+}
+/****************************************************************//**
+Get last element. The vector must not be empty.
+@return	last element */
+UNIV_INLINE
+void*
+ib_vector_get_last(
+/*===============*/
+	ib_vector_t*	vec)	/*!< in: vector */
+{
+	ut_a(vec->used > 0);
+
+	return((byte*) ib_vector_get(vec, vec->used - 1));
+}
+
+/****************************************************************//**
+Set the n'th element. */
+UNIV_INLINE
+void
+ib_vector_set(
+/*==========*/
+	ib_vector_t*	vec,	/*!< in/out: vector */
+	ulint		n,	/*!< in: element index to set */
+	void*		elem)	/*!< in: data element */
+{
+	void*		slot;
+
+	ut_a(n < vec->used);
+
+	slot = ((byte*) vec->data + IB_VEC_OFFSET(vec, n));
+	memcpy(slot, elem, vec->sizeof_value);
+}
+
+/********************************************************************
+Reset the vector size to 0 elements. */
+UNIV_INLINE
+void
+ib_vector_reset(
+/*============*/
+					/* out: void */
+	ib_vector_t*	vec)		/* in: vector */
+{
+	vec->used = 0;
+}
+
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+void*
+ib_vector_last(
+/*===========*/
+					/* out: void */
+	ib_vector_t*	vec)		/* in: vector */
+{
+	ut_a(ib_vector_size(vec) > 0);
+
+	return(ib_vector_get(vec, ib_vector_size(vec) - 1));
+}
+
+/********************************************************************
+Get the last element of the vector. */
+UNIV_INLINE
+const void*
+ib_vector_last_const(
+/*=================*/
+					/* out: void */
+	const ib_vector_t*	vec)	/* in: vector */
+{
+	ut_a(ib_vector_size(vec) > 0);
+
+	return(ib_vector_get_const(vec, ib_vector_size(vec) - 1));
+}
+
+/****************************************************************//**
+Remove the last element from the vector.
+@return	last vector element */
+UNIV_INLINE
+void*
+ib_vector_pop(
+/*==========*/
+				/* out: pointer to element */
+	ib_vector_t*	vec)	/* in: vector */
+{
+	void*		elem;
+
+	ut_a(vec->used > 0);
+
+	elem = ib_vector_last(vec);
+	--vec->used;
+
+	return(elem);
+}
+
+/********************************************************************
+Append an element to the vector, if elem != NULL then copy the data
+from elem.*/
+UNIV_INLINE
+void*
+ib_vector_push(
+/*===========*/
+				/* out: pointer to the "new" element */
+	ib_vector_t*	vec,	/* in: vector */
+	const void*	elem)	/* in: element to add (can be NULL) */
+{
+	void*		last;
+
+	if (vec->used >= vec->total) {
+		ib_vector_resize(vec);
+	}
+
+	last = (byte*) vec->data + IB_VEC_OFFSET(vec, vec->used);
+
+#ifdef UNIV_DEBUG
+	memset(last, 0, vec->sizeof_value);
+#endif
+
+	if (elem) {
+		memcpy(last, elem, vec->sizeof_value);
+	}
+
+	++vec->used;
+
+	return(last);
+}
+
+/*******************************************************************//**
+Remove an element to the vector
+@return pointer to the "removed" element */
+UNIV_INLINE
+void*
+ib_vector_remove(
+/*=============*/
+	ib_vector_t*	vec,	/*!< in: vector */
+	const void*	elem)	/*!< in: value to remove */
+{
+	void*		current = NULL;
+	void*		next;
+	ulint		i;
+	ulint		old_used_count = vec->used;
+
+	for (i = 0; i < vec->used; i++) {
+		current = ib_vector_get(vec, i);
+
+		if (*(void**) current == elem) {
+			if (i == vec->used - 1) {
+				return(ib_vector_pop(vec));
+			}
+
+			next = ib_vector_get(vec, i + 1);
+			memmove(current, next, vec->sizeof_value
+			        * (vec->used - i - 1));
+			--vec->used;
+			break;
+		}
+	}
+
+	return((old_used_count != vec->used) ? current : NULL);
+}
+
+/********************************************************************
+Sort the vector elements. */
+UNIV_INLINE
+void
+ib_vector_sort(
+/*===========*/
+				/* out: void */
+	ib_vector_t*	vec,	/* in: vector */
+	ib_compare_t	compare)/* in: the comparator to use for sort */
+{
+	qsort(vec->data, vec->used, vec->sizeof_value, compare);
+}
+
+/********************************************************************
+Destroy the vector. Make sure the vector owns the allocator, e.g.,
+the heap in the the heap allocator. */
+UNIV_INLINE
+void
+ib_vector_free(
+/*===========*/
+	ib_vector_t*	vec)		/* in, own: vector */
+{
+	/* Currently we only support two types of allocators, heap
+	and ut_malloc(), when the heap is freed all the elements are
+	freed too. With ut allocator, we need to free the elements,
+	the vector instance and the allocator separately. */
+
+	/* Only the heap allocator uses the arg field. */
+	if (vec->allocator->arg) {
+		mem_heap_free((mem_heap_t*) vec->allocator->arg);
+	} else {
+		ib_alloc_t*	allocator;
+
+		allocator = vec->allocator;
+
+		allocator->mem_release(allocator, vec->data);
+		allocator->mem_release(allocator, vec);
+
+		ib_ut_allocator_free(allocator);
+	}
+}
+
+/********************************************************************
+Test whether a vector is empty or not.
+@return TRUE if empty */
+UNIV_INLINE
+ibool
+ib_vector_is_empty(
+/*===============*/
+	const ib_vector_t*	vec)	/*!< in: vector */
+{
+	return(ib_vector_size(vec) == 0);
+}
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
new file mode 100644
index 00000000000..33385ddf2d4
--- /dev/null
+++ b/storage/innobase/include/ut0wqueue.h
@@ -0,0 +1,105 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0wqueue.h
+A work queue
+
+Created 4/26/2006 Osku Salerma
+************************************************************************/
+
+/*******************************************************************//**
+A Work queue. Threads can add work items to the queue and other threads can
+wait for work items to be available and take them off the queue for
+processing.
+************************************************************************/
+
+#ifndef IB_WORK_QUEUE_H
+#define IB_WORK_QUEUE_H
+
+#include "ut0list.h"
+#include "mem0mem.h"
+#include "os0sync.h"
+#include "sync0types.h"
+
+struct ib_wqueue_t;
+
+/****************************************************************//**
+Create a new work queue.
+@return	work queue */
+UNIV_INTERN
+ib_wqueue_t*
+ib_wqueue_create(void);
+/*===================*/
+
+/****************************************************************//**
+Free a work queue. */
+UNIV_INTERN
+void
+ib_wqueue_free(
+/*===========*/
+	ib_wqueue_t*	wq);	/*!< in: work queue */
+
+/****************************************************************//**
+Add a work item to the queue. */
+UNIV_INTERN
+void
+ib_wqueue_add(
+/*==========*/
+	ib_wqueue_t*	wq,	/*!< in: work queue */
+	void*		item,	/*!< in: work item */
+	mem_heap_t*	heap);	/*!< in: memory heap to use for allocating the
+				list node */
+
+/********************************************************************
+Check if queue is empty. */
+
+ibool
+ib_wqueue_is_empty(
+/*===============*/
+					/* out: TRUE if queue empty
+					else FALSE */
+	const ib_wqueue_t*      wq);    /* in: work queue */
+
+/****************************************************************//**
+Wait for a work item to appear in the queue.
+@return	work item */
+UNIV_INTERN
+void*
+ib_wqueue_wait(
+/*===========*/
+	ib_wqueue_t*	wq);	/*!< in: work queue */
+
+/********************************************************************
+Wait for a work item to appear in the queue for specified time. */
+
+void*
+ib_wqueue_timedwait(
+/*================*/
+					/* out: work item or NULL on timeout*/
+	ib_wqueue_t*	wq,		/* in: work queue */
+	ib_time_t	wait_in_usecs); /* in: wait time in micro seconds */
+
+/* Work queue. */
+struct ib_wqueue_t {
+	ib_mutex_t		mutex;	/*!< mutex protecting everything */
+	ib_list_t*	items;	/*!< work item list */
+	os_event_t	event;	/*!< event we use to signal additions to list */
+};
+
+#endif
author	Sergei Golubchik <vuvova@gmail.com>	2015-05-04 19:17:21 +0200
committer	Sergei Golubchik <vuvova@gmail.com>	2015-05-04 19:17:21 +0200
commit	6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch)
tree	21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/include
parent	1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff)
download	mariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz