Added Innobase to source distribution

Docs/manual.texi: Added Innobase documentation configure.in: Incremented version include/my_base.h: Added option for Innobase myisam/mi_check.c: cleanup mysql-test/t/bdb.test: cleanup mysql-test/t/innobase.test: Extended with new tests from bdb.test mysql-test/t/merge.test: Added test of SHOW create mysys/my_init.c: Fix for UNIXWARE 7 scripts/mysql_install_db.sh: Always write how to start mysqld scripts/safe_mysqld.sh: Fixed type sql/ha_innobase.cc: Update to new version sql/ha_innobase.h: Update to new version sql/handler.h: Added 'update_table_comment()' and 'append_create_info()' sql/sql_delete.cc: Fixes for Innobase sql/sql_select.cc: Fixes for Innobase sql/sql_show.cc: Append create information (for MERGE tables) sql/sql_update.cc: Fixes for Innobase
author: unknown <monty@donna.mysql.com> 2001-02-17 14:19:19 +0200
committer: unknown <monty@donna.mysql.com> 2001-02-17 14:19:19 +0200
commit: 2662b59306ef0cd495fa6e2edf7129e58a11393a (patch)
tree: bfe39951a73e906579ab819bf5198ad8f3a64a36 /innobase/include
parent: 66de55a56bdcf2f7a9c0c4f8e19b3e761475e202 (diff)
download: mariadb-git-2662b59306ef0cd495fa6e2edf7129e58a11393a.tar.gz
183 files changed, 34741 insertions, 0 deletions
diff --git a/innobase/include/Makefile.i b/innobase/include/Makefile.i
new file mode 100644
index 00000000000..2bc51147347
--- /dev/null
+++ b/innobase/include/Makefile.i
@@ -0,0 +1,5 @@
+# Makefile included in Makefile.am in every subdirectory
+
+libsdir = ../libs
+
+INCLUDES =		-I../../include -I../include
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
new file mode 100644
index 00000000000..d2ac9952695
--- /dev/null
+++ b/innobase/include/btr0btr.h
@@ -0,0 +1,391 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0btr_h
+#define btr0btr_h
+
+#include "univ.i"
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "rem0rec.h"
+#include "mtr0mtr.h"
+#include "btr0types.h"
+
+/* Maximum record size which can be stored on a page, without using the
+special big record storage structure */
+
+#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
+
+/* Maximum key size in a B-tree: the records on non-leaf levels must be
+shorter than this */
+
+#define	BTR_PAGE_MAX_KEY_SIZE	1024
+
+/* If data in page drops below this limit, we try to compress it.
+NOTE! The value has to be > 2 * BTR_MAX_KEY_SIZE */
+
+#define BTR_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 4 + 1);
+
+/* Latching modes for the search function (in btr0cur.*) */
+#define BTR_SEARCH_LEAF		RW_S_LATCH
+#define BTR_MODIFY_LEAF		RW_X_LATCH
+#define BTR_NO_LATCHES		RW_NO_LATCH
+#define	BTR_MODIFY_TREE		33
+#define	BTR_CONT_MODIFY_TREE	34
+#define	BTR_SEARCH_PREV		35
+#define	BTR_MODIFY_PREV		36
+
+/* If this is ORed to the latch mode, it means that the search tuple will be
+inserted to the index, at the searched position */
+#define BTR_INSERT		512
+
+/* This flag ORed to latch mode says that we do the search in query
+optimization */
+#define BTR_ESTIMATE		1024
+/******************************************************************
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+	ulint	space,		/* in: space id */
+	ulint	page_no,	/* in: page number */
+	ulint	mode,		/* in: latch mode */
+	mtr_t*	mtr);		/* in: mtr */
+/******************************************************************
+Gets the index id field of a page. */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+				/* out: index id */
+	page_t*		page);	/* in: index page */
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+			/* out: level, leaf level == 0 */
+	page_t*	page);	/* in: index page */
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+			/* out: level, leaf level == 0 */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Gets the next index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+			/* out: next page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Gets the previous index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+			/* out: prev page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/*****************************************************************
+Gets pointer to the previous user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+			/* out: previous user record, NULL if there is none */
+	rec_t*	rec,	/* in: record on leaf level */
+	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
+			needed, also to the previous page */
+/*****************************************************************
+Gets pointer to the next user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+			/* out: next user record, NULL if there is none */
+	rec_t*	rec,	/* in: record on leaf level */
+	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
+			needed, also to the next page */
+/******************************************************************
+Releases the latch on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	page_t*	page,		/* in: page */
+	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
+	mtr_t*	mtr);		/* in: mtr */
+/******************************************************************
+Gets the child node file address in a node pointer. */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+			   	/* out: child node address */
+	rec_t*	rec);		/* in: node pointer record */
+/****************************************************************
+Creates the root node for a new index tree. */
+
+ulint
+btr_create(
+/*=======*/
+			/* out: page number of the created root, FIL_NULL if
+			did not succeed */
+	ulint	type,	/* in: type of the index */
+	ulint	space,	/* in: space where created */
+	dulint	index_id,/* in: index id */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/****************************************************************
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+
+void
+btr_free_but_not_root(
+/*==================*/
+	ulint	space,		/* in: space where created */
+	ulint	root_page_no);	/* in: root page number */
+/****************************************************************
+Frees the B-tree root page. Other tree MUST already have been freed. */
+
+void
+btr_free_root(
+/*==========*/
+	ulint	space,		/* in: space where created */
+	ulint	root_page_no,	/* in: root page number */
+	mtr_t*	mtr);		/* in: a mini-transaction which has already
+				been started */
+/*****************************************************************
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called. */
+
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+				/* out: inserted record */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
+				on the root page; when the function returns,
+				the cursor is positioned on the predecessor
+				of the inserted record */
+	dtuple_t*	tuple,	/* in: tuple to insert */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize(
+/*================*/
+	page_t*	page,	/* in: page to be reorganized */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize_low(
+/*====================*/
+	ibool	low,	/* in: TRUE if locks should not be updated, i.e.,
+			there cannot exist locks on the page */
+	page_t*	page,	/* in: page to be reorganized */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Decides if the page should be split at the convergence point of
+inserts converging to left. */
+
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+				/* out: TRUE if split recommended */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert */
+	rec_t**		split_rec);/* out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+/*****************************************************************
+Decides if the page should be split at the convergence point of
+inserts converging to right. */
+
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+				/* out: TRUE if split recommended */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert */
+	rec_t**		split_rec);/* out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+/*****************************************************************
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
+is released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore
+enough free disk space must be guaranteed to be available before
+this function is called. */
+
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+				/* out: inserted record; NOTE: the tree
+				x-latch is released! NOTE: 2 free disk
+				pages must be available! */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	dtuple_t*	tuple,	/* in: tuple to insert */
+	mtr_t*		mtr);	/* in: mtr */
+/***********************************************************
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		level,	/* in: level, must be > 0 */
+	dtuple_t*	tuple,	/* in: the record to be inserted */
+	mtr_t*		mtr);	/* in: mtr */
+/********************************************************************
+Sets a record as the predefined minimum record. */
+
+void
+btr_set_min_rec_mark(
+/*=================*/
+	rec_t*	rec,	/* in: record */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Deletes on the upper level the node pointer to a page. */
+
+void
+btr_node_ptr_delete(
+/*================*/
+	dict_tree_t*	tree,	/* in: index tree */
+	page_t*		page,	/* in: page whose node pointer is deleted */
+	mtr_t*		mtr);	/* in: mtr */
+/****************************************************************
+Checks that the node pointer to a page is appropriate. */
+
+ibool
+btr_check_node_ptr(
+/*===============*/
+				/* out: TRUE */
+	dict_tree_t*	tree,	/* in: index tree */
+	page_t*		page,	/* in: index page */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the
+brother reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to
+the brothers, if they exist. NOTE: it is assumed that the caller has reserved
+enough free extents so that the compression will always succeed if done! */
+void
+btr_compress(
+/*=========*/
+	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
+				the page must not be empty: in record delete
+				use btr_discard_page if the page would become
+				empty */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+
+void
+btr_discard_page(
+/*=============*/
+	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
+				the root page */
+	mtr_t*		mtr);	/* in: mtr */
+/************************************************************************
+Declares the latching order level for the page latch in the debug version. */
+UNIV_INLINE
+void
+btr_declare_page_latch(
+/*===================*/
+	page_t*	page,	/* in: page */
+	ibool	leaf);	/* in: TRUE if a leaf */
+/********************************************************************
+Parses the redo log record for setting an index record as the predefined
+minimum record. */
+
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/***************************************************************
+Parses a redo log record of reorganizing a page. */
+
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/******************************************************************
+Gets the number of pages in a B-tree. */
+
+ulint
+btr_get_size(
+/*=========*/
+				/* out: number of pages */
+	dict_index_t*	index,	/* in: index */
+	ulint		flag);	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+/*****************************************************************
+Prints size info of a B-tree. */
+
+void
+btr_print_size(
+/*===========*/
+	dict_tree_t*	tree);	/* in: index tree */
+/******************************************************************
+Prints directories and other info of all nodes in the tree. */
+
+void
+btr_print_tree(
+/*===========*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		width);	/* in: print this many entries from start
+				and end */
+/******************************************************************
+Checks the consistency of an index tree. */
+
+void
+btr_validate_tree(
+/*==============*/
+	dict_tree_t*	tree);	/* in: tree */
+
+#define BTR_N_LEAF_PAGES 	1
+#define BTR_TOTAL_SIZE		2
+
+#ifndef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic
new file mode 100644
index 00000000000..5c1c89e9840
--- /dev/null
+++ b/innobase/include/btr0btr.ic
@@ -0,0 +1,223 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+
+#define BTR_MAX_NODE_LEVEL	50	/* used in debug checking */
+
+/******************************************************************
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+	ulint	space,		/* in: space id */
+	ulint	page_no,	/* in: page number */
+	ulint	mode,		/* in: latch mode */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	page_t*	page;
+
+	page = buf_page_get(space, page_no, mode, mtr);
+#ifdef UNIV_SYNC_DEBUG
+	if (mode != RW_NO_LATCH) {
+	
+		buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+	}
+#endif
+	return(page);
+}
+
+/******************************************************************
+Sets the index id field of a page. */
+UNIV_INLINE
+void
+btr_page_set_index_id(
+/*==================*/
+	page_t*		page,	/* in: page to be created */
+	dulint		id,	/* in: index id */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id,
+							MLOG_8BYTES, mtr);
+}
+
+/******************************************************************
+Gets the index id field of a page. */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+				/* out: index id */
+	page_t*		page)	/* in: index page */
+{
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
+}
+
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+			/* out: level, leaf level == 0 */
+	page_t*	page)	/* in: index page */
+{
+	ulint	level;
+
+	ut_ad(page);
+	
+	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	return(level);
+}
+
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+			/* out: level, leaf level == 0 */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	return(btr_page_get_level_low(page));	
+}
+
+/************************************************************
+Sets the node level field in an index page. */
+UNIV_INLINE
+void
+btr_page_set_level(
+/*===============*/
+	page_t*	page,	/* in: index page */
+	ulint	level,	/* in: level, leaf level == 0 */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
+							 MLOG_2BYTES, mtr);
+}
+
+/************************************************************
+Gets the next index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+			/* out: next page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+			      				MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains(mtr, buf_block_align(page),
+			      				MTR_MEMO_PAGE_S_FIX));
+
+	return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/************************************************************
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+	page_t*	page,	/* in: index page */
+	ulint	next,	/* in: next page number */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+}
+
+/************************************************************
+Gets the previous index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+			/* out: prev page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	
+	return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+/************************************************************
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+	page_t*	page,	/* in: index page */
+	ulint	prev,	/* in: previous page number */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+}
+
+/******************************************************************
+Gets the child node file address in a node pointer. */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+			   	/* out: child node address */
+	rec_t*	rec)		/* in: node pointer record */
+{
+	ulint	n_fields;
+	byte*	field;
+	ulint	len;
+
+	n_fields = rec_get_n_fields(rec);
+
+	/* The child address is in the last field */	
+	field = rec_get_nth_field(rec, n_fields - 1, &len);
+
+	ut_ad(len == 4);
+	
+	return(mach_read_from_4(field));
+}
+
+/******************************************************************
+Releases the latches on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	page_t*	page,		/* in: page */
+	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
+							MTR_MEMO_MODIFY));
+	if (latch_mode == BTR_SEARCH_LEAF) {
+		mtr_memo_release(mtr, buf_block_align(page), 
+							MTR_MEMO_PAGE_S_FIX);
+	} else {
+		ut_ad(latch_mode == BTR_MODIFY_LEAF);
+		mtr_memo_release(mtr, buf_block_align(page), 
+							MTR_MEMO_PAGE_X_FIX);
+	}
+}
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
new file mode 100644
index 00000000000..79ec56c8e50
--- /dev/null
+++ b/innobase/include/btr0cur.h
@@ -0,0 +1,519 @@
+/******************************************************
+The index tree cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0cur_h
+#define btr0cur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "btr0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "ha0ha.h"
+
+/* Mode flags for btr_cur operations; these can be ORed */
+#define BTR_NO_UNDO_LOG_FLAG	1	/* do no undo logging */
+#define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
+#define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
+					update vector or inserted entry */
+
+#define BTR_CUR_ADAPT
+#define BTR_CUR_HASH_ADAPT
+
+/*************************************************************
+Returns the page cursor component of a tree cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+				/* out: pointer to page cursor component */
+	btr_cur_t*	cursor);	/* in: tree cursor */
+/*************************************************************
+Returns the record pointer of a tree cursor. */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+				/* out: pointer to record */
+	btr_cur_t*	cursor);	/* in: tree cursor */
+/*************************************************************
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor);	/* in: tree cursor */
+/*************************************************************
+Returns the page of a tree cursor. */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+				/* out: pointer to page */
+	btr_cur_t*	cursor);	/* in: tree cursor */
+/*************************************************************
+Returns the tree of a cursor. */
+UNIV_INLINE
+dict_tree_t*
+btr_cur_get_tree(
+/*=============*/
+				/* out: tree */
+	btr_cur_t*	cursor);	/* in: tree cursor */
+/*************************************************************
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index, 	/* in: index */
+	rec_t*		rec,	/* in: record in tree */
+	btr_cur_t*	cursor);/* in: cursor */
+/************************************************************************
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
+
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+	dict_index_t*	index,	/* in: index */
+	ulint		level,	/* in: the tree level of search */
+	dtuple_t*	tuple,	/* in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be PAGE_CUR_LE,
+				not PAGE_CUR_GE, as the latter may end up on
+				the previous page of the record! Inserts
+				should always be made using PAGE_CUR_LE to
+				search the position! */
+	ulint		latch_mode, /* in: BTR_SEARCH_LEAF, ...;
+				cursor->left_page is used to store a pointer
+				to the left neighbor page, in the cases
+				BTR_SEARCH_PREV and BTR_MODIFY_PREV */
+	btr_cur_t*	cursor, /* out: tree cursor; the cursor page is s- or
+				x-latched */
+	ulint		has_search_latch,/* in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr);	/* in: mtr */
+/*********************************************************************
+Opens a cursor at either end of an index. */
+
+void
+btr_cur_open_at_index_side(
+/*=======================*/
+	ibool		from_left,	/* in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: latch mode */
+	btr_cur_t*	cursor,		/* in: cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+
+void
+btr_cur_open_at_rnd_pos(
+/*====================*/
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/* in/out: B-tree cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/*****************************************************************
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record. */
+
+ulint
+btr_cur_optimistic_insert(
+/*======================*/
+				/* out: DB_SUCCESS, DB_WAIT_LOCK,
+				DB_FAIL, or error number */
+	ulint		flags,	/* in: undo logging and locking flags: if not
+				zero, the parameters index and thr should be
+				specified */
+	btr_cur_t*	cursor,	/* in: cursor on page after which
+				to insert; cursor stays valid */
+	dtuple_t*	entry,	/* in: entry to insert */
+	rec_t**		rec,	/* out: pointer to inserted record if
+				succeed */
+	que_thr_t*	thr,	/* in: query thread or NULL */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_insert(
+/*=======================*/
+				/* out: DB_SUCCESS or error number */
+	ulint		flags,	/* in: undo logging and locking flags: if not
+				zero, the parameters index and thr should be
+				specified */
+	btr_cur_t*	cursor,	/* in: cursor after which to insert;
+				cursor does not stay valid */
+	dtuple_t*	entry,	/* in: entry to insert */
+	rec_t**		rec,	/* out: pointer to inserted record if
+				succeed */
+	que_thr_t*	thr,	/* in: query thread or NULL */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Updates a record when the update causes no size changes in its fields. */
+
+ulint
+btr_cur_update_in_place(
+/*====================*/
+				/* out: DB_SUCCESS or error number */
+	ulint		flags,	/* in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/* in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	upd_t*		update,	/* in: update vector */
+	ulint		cmpl_info,/* in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended. */
+
+ulint
+btr_cur_optimistic_update(
+/*======================*/
+				/* out: DB_SUCCESS, or DB_OVERFLOW if the
+				updated record does not fit, DB_UNDERFLOW
+				if the page would become too empty */
+	ulint		flags,	/* in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/* in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	upd_t*		update,	/* in: update vector; this must also
+				contain trx id and roll ptr fields */
+	ulint		cmpl_info,/* in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_update(
+/*=======================*/
+				/* out: DB_SUCCESS or error code */
+	ulint		flags,	/* in: undo logging, locking, and rollback
+				flags */
+	btr_cur_t*	cursor,	/* in: cursor on the record to update;
+				cursor does not stay valid */
+	upd_t*		update,	/* in: update vector; this is allowed also
+				contain trx id and roll ptr fields, but
+				the values in update vector have no effect */
+	ulint		cmpl_info,/* in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created. */
+
+ulint
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+				number */
+	ulint		flags,	/* in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/* in: cursor */
+	ibool		val,	/* in: value to set */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Sets a secondary index record delete mark to TRUE or FALSE. */
+
+ulint
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+				number */
+	ulint		flags,	/* in: locking flag */
+	btr_cur_t*	cursor,	/* in: cursor */
+	ibool		val,	/* in: value to set */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Sets a secondary index record delete mark to FALSE. This function is
+only used by the insert buffer insert merge mechanism. */
+
+void
+btr_cur_del_unmark_for_ibuf(
+/*========================*/
+	rec_t*	rec,	/* in: record to delete unmark */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Tries to compress a page of the tree on the leaf level. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+void
+btr_cur_compress(
+/*=============*/
+	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
+				cursor does not stay valid */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+				/* out: TRUE if compression occurred */
+	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
+				cursor does not stay valid if compression
+				occurs */
+	mtr_t*		mtr);	/* in: mtr */
+/***********************************************************
+Removes the record on which the tree cursor is positioned. It is assumed
+that the mtr has an x-latch on the page where the cursor is positioned,
+but no latch on the whole tree. */
+
+ibool
+btr_cur_optimistic_delete(
+/*======================*/
+				/* out: TRUE if success, i.e., the page
+				did not become too empty */
+	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
+				cursor stays valid: if deletion succeeds,
+				on function exit it points to the successor
+				of the deleted record */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist. */
+
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+				/* out: TRUE if compression occurred */
+	ulint*		err,	/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+				the latter may occur because we may have
+				to update node pointers on upper levels,
+				and in the case of variable length keys
+				these may actually grow in size */
+	ibool		has_reserved_extents, /* in: TRUE if the
+				caller has already reserved enough free
+				extents so that he knows that the operation
+				will succeed */
+	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
+				if compression does not occur, the cursor
+				stays valid: it points to successor of
+				deleted record on function exit */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Parses a redo log record of updating a record in-place. */
+
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */
+/***************************************************************
+Parses a redo log record of updating a record, but not in-place. */
+
+byte*
+btr_cur_parse_opt_update(
+/*=====================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a clustered
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */	
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a secondary
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */	
+/***********************************************************************
+Estimates the number of rows in a given index range. */
+
+ulint
+btr_estimate_n_rows_in_range(
+/*=========================*/
+				/* out: estimated number of rows */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple1,	/* in: range start, may also be empty tuple */
+	ulint		mode1,	/* in: search mode for range start */
+	dtuple_t*	tuple2,	/* in: range end, may also be empty tuple */
+	ulint		mode2);	/* in: search mode for range end */
+/***********************************************************************
+Estimates the number of different key values in a given index. */
+
+ulint
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+				/* out: estimated number of key values */
+	dict_index_t*	index);	/* in: index */
+	
+
+/*######################################################################*/
+
+/* In the pessimistic delete, if the page data size drops below this
+limit, merging it to a neighbor is tried */
+
+#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
+
+/* A slot in the path array. We store here info on a search path down the
+tree. Each slot contains data on a single level of the tree. */
+
+typedef struct btr_path_struct	btr_path_t;
+struct btr_path_struct{
+	ulint	nth_rec;	/* index of the record
+				where the page cursor stopped on
+				this level (index in alphabetical
+				order); value ULINT_UNDEFINED
+				denotes array end */
+	ulint	n_recs;		/* number of records on the page */
+};
+
+#define BTR_PATH_ARRAY_N_SLOTS	250	/* size of path array (in slots) */
+
+/* The tree cursor: the definition appears here only for the compiler
+to know struct size! */
+
+struct btr_cur_struct {
+	dict_index_t*	index;		/* index where positioned */
+	page_cur_t	page_cur;	/* page cursor */
+	page_t*		left_page;	/* this field is used to store a pointer
+					to the left neighbor page, in the cases
+					BTR_SEARCH_PREV and BTR_MODIFY_PREV */
+	/*------------------------------*/
+	que_thr_t*	thr;		/* this field is only used when
+					btr_cur_search_... is called for an
+					index entry insertion: the calling
+					query thread is passed here to be
+					used in the insert buffer */
+	/*------------------------------*/
+	/* The following fields are used in btr_cur_search... to pass
+	information: */
+	ulint		flag;		/* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
+					BTR_CUR_BINARY, or
+					BTR_CUR_INSERT_TO_IBUF */
+	ulint		tree_height;	/* Tree height if the search is done
+					for a pessimistic insert or update
+					operation */
+	ulint		up_match;	/* If the search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					the first user record to the right of
+					the cursor record after
+					btr_cur_search_...;
+					for the mode PAGE_CUR_GE, the matched
+					fields to the first user record AT THE
+					CURSOR or to the right of it;
+					NOTE that the up_match and low_match
+					values may exceed the correct values
+					for comparison to the adjacent user
+					record if that record is on a
+					different leaf page! (See the note in
+					row_ins_duplicate_key.) */
+	ulint		up_bytes;	/* number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		low_match;	/* if search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					first user record AT THE CURSOR or
+					to the left of it after
+					btr_cur_search_...;
+					NOT defined for PAGE_CUR_GE or any
+					other search modes; see also the NOTE
+					in up_match! */
+	ulint		low_bytes;	/* number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		n_fields;	/* prefix length used in a hash
+					search if hash_node != NULL */
+	ulint		n_bytes;	/* hash prefix bytes if hash_node !=
+					NULL */
+	ulint		fold;		/* fold value used in the search if
+					flag is BTR_CUR_HASH */
+	/*------------------------------*/
+	btr_path_t*	path_arr;	/* in estimating the number of
+					rows in range, we store in this array
+					information of the path through
+					the tree */
+};
+
+/* Values for the flag documenting the used search method */
+#define BTR_CUR_HASH		1	/* successful shortcut using the hash
+					index */
+#define BTR_CUR_HASH_FAIL	2	/* failure using hash, success using
+					binary search: the misleading hash
+					reference is stored in the field
+					hash_node, and might be necessary to
+					update */
+#define BTR_CUR_BINARY		3	/* success using the binary search */
+#define BTR_CUR_INSERT_TO_IBUF	4	/* performed the intended insert to
+					the insert buffer */
+
+/* If pessimistic delete fails because of lack of file space,
+there is still a good change of success a little later: try this many times,
+and sleep this many microseconds in between */
+#define BTR_CUR_RETRY_DELETE_N_TIMES	100
+#define BTR_CUR_RETRY_SLEEP_TIME	50000
+
+extern ulint	btr_cur_n_non_sea;
+
+#ifndef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+				
+#endif
diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic
new file mode 100644
index 00000000000..a3a04b60c45
--- /dev/null
+++ b/innobase/include/btr0cur.ic
@@ -0,0 +1,172 @@
+/******************************************************
+The index tree cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0btr.h"
+
+/*************************************************************
+Returns the page cursor component of a tree cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+				/* out: pointer to page cursor component */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return(&(cursor->page_cur));
+}
+
+/*************************************************************
+Returns the record pointer of a tree cursor. */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+				/* out: pointer to record */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return(page_cur_get_rec(&(cursor->page_cur)));
+}
+
+/*************************************************************
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	page_cur_invalidate(&(cursor->page_cur));
+}
+
+/*************************************************************
+Returns the page of a tree cursor. */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+				/* out: pointer to page */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return(buf_frame_align(page_cur_get_rec(&(cursor->page_cur))));
+}
+
+/*************************************************************
+Returns the tree of a cursor. */
+UNIV_INLINE
+dict_tree_t*
+btr_cur_get_tree(
+/*=============*/
+				/* out: tree */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return((cursor->index)->tree);
+}
+
+/*************************************************************
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index, 	/* in: index */
+	rec_t*		rec,	/* in: record in tree */
+	btr_cur_t*	cursor)	/* in: cursor */
+{
+	page_cur_position(rec, btr_cur_get_page_cur(cursor));
+
+	cursor->index = index;
+}
+
+/*************************************************************************
+Checks if compressing an index page where a btr cursor is placed makes
+sense. */
+UNIV_INLINE
+ibool
+btr_cur_compress_recommendation(
+/*============================*/
+				/* out: TRUE if compression is recommended */
+	btr_cur_t*	cursor,	/* in: btr cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	page_t*		page;
+	
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(
+					btr_cur_get_page(cursor)),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+
+		/* The page fillfactor has dropped below a predefined
+		minimum value OR the level in the B-tree contains just
+		one page: we recommend compression if this is not the
+		root page. */
+		
+		if (dict_tree_get_page((cursor->index)->tree)
+		    == buf_frame_get_page_no(page)) {
+
+		    	/* It is the root page */
+
+		    	return(FALSE);
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}	
+
+/*************************************************************************
+Checks if the record on which the cursor is placed can be deleted without
+making tree compression necessary (or, recommended). */
+UNIV_INLINE
+ibool
+btr_cur_can_delete_without_compress(
+/*================================*/
+				/* out: TRUE if can be deleted without
+				recommended compression */
+	btr_cur_t*	cursor,	/* in: btr cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ulint		rec_size;
+	page_t*		page;
+	
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(
+					btr_cur_get_page(cursor)),
+				MTR_MEMO_PAGE_X_FIX));
+
+	rec_size = rec_get_size(btr_cur_get_rec(cursor));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
+	    || (page_get_n_recs(page) < 2)) { 
+
+		/* The page fillfactor will drop below a predefined
+		minimum value, OR the level in the B-tree contains just
+		one page, OR the page will become empty: we recommend
+		compression if this is not the root page. */
+		
+		if (dict_tree_get_page((cursor->index)->tree)
+		    == buf_frame_get_page_no(page)) {
+
+		    	/* It is the root page */
+
+		    	return(TRUE);
+		}
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
new file mode 100644
index 00000000000..c07d5199d8c
--- /dev/null
+++ b/innobase/include/btr0pcur.h
@@ -0,0 +1,486 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0pcur_h
+#define btr0pcur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+#include "page0cur.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "btr0types.h"
+
+/* Relative positions for a stored cursor position */
+#define BTR_PCUR_ON	1
+#define BTR_PCUR_BEFORE	2
+#define BTR_PCUR_AFTER	3
+
+/******************************************************************
+Allocates memory for a persistent cursor object and initializes the cursor. */
+
+btr_pcur_t*
+btr_pcur_create_for_mysql(void);
+/*============================*/
+				/* out, own: persistent cursor */
+/******************************************************************
+Frees the memory for a persistent cursor object. */
+
+void
+btr_pcur_free_for_mysql(
+/*====================*/
+	btr_pcur_t*	cursor);	/* in, own: persistent cursor */
+/******************************************************************
+Copies the stored position of a pcur to another pcur. */
+
+void
+btr_pcur_copy_stored_position(
+/*==========================*/
+	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
+					position info */
+	btr_pcur_t*	pcur_donate);	/* in: pcur from which the info is
+					copied */
+/******************************************************************
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+	btr_pcur_t*	pcur);	/* in: persistent cursor */
+/******************************************************************
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open(
+/*==========*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/******************************************************************
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init(
+/*=======================*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page of the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/* in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr);	/* in: mtr */
+/*********************************************************************
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+	ibool		from_left,	/* in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: latch mode */
+	btr_pcur_t*	pcur,		/* in: cursor */
+	ibool		do_init,	/* in: TRUE if should be initialized */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+Gets the up_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_GE,
+				otherwise undefined */
+	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
+/******************************************************************
+Gets the low_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_LE,
+				otherwise undefined */
+	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
+/******************************************************************
+If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
+user record satisfying the search condition, in the case PAGE_CUR_L or
+PAGE_CUR_LE, on the last user record. If no such user record exists, then
+in the first case sets the cursor after last in tree, and in the latter case
+before first in tree. The latching mode must be BTR_SEARCH_LEAF or
+BTR_MODIFY_LEAF. */
+
+void
+btr_pcur_open_on_user_rec(
+/*======================*/
+	dict_index_t*	index,		/* in: index */
+	dtuple_t*	tuple,		/* in: tuple on which search done */
+	ulint		mode,		/* in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	btr_pcur_t*	cursor, 	/* in: memory buffer for persistent
+					cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos(
+/*=====================*/
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+Frees the possible old_rec_buf buffer of a persistent cursor and sets the
+latch mode of the persistent cursor to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/******************************************************************
+The position of the cursor is stored by taking an initial segment of the
+record the cursor is positioned on, before, or after, and copying it to the
+cursor data structure. NOTE that the page where the cursor is positioned
+must not be empty! */
+
+void
+btr_pcur_store_position(
+/*====================*/
+	btr_pcur_t*	cursor, 	/* in: persistent cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
+releases the page latch and bufferfix reserved by the cursor.
+NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
+made by the current mini-transaction to the data protected by the
+cursor latch, as then the latch must not be released until mtr_commit. */
+
+void
+btr_pcur_release_leaf(
+/*==================*/
+	btr_pcur_t*	cursor, /* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+				/* out: BTR_PCUR_ON, ... */
+	btr_pcur_t*	cursor);/* in: persistent cursor */
+/******************************************************************
+Restores the stored position of a persistent cursor bufferfixing the page and
+obtaining the specified latches. If the cursor position was saved when the
+(1) cursor was positioned on a user record: this function restores the position
+to the last record LESS OR EQUAL to the stored record;
+(2) cursor was positioned on a page infimum record: restores the position to
+the last record LESS than the user record which was the successor of the page
+infimum;
+(3) cursor was positioned on the page supremum: restores to the first record
+GREATER than the user record which was the predecessor of the supremum. */
+
+ibool
+btr_pcur_restore_position(
+/*======================*/
+					/* out: TRUE if the cursor position
+					was stored when it was on a user record
+					and it can be restored on a user record
+					whose ordering fields are identical to
+					the ones of the original user record */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, 	/* in: detached persistent cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/*************************************************************
+Sets the mtr field for a pcur. */
+UNIV_INLINE
+void
+btr_pcur_set_mtr(
+/*=============*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in, own: mtr */
+/*************************************************************
+Gets the mtr field for a pcur. */
+UNIV_INLINE
+mtr_t*
+btr_pcur_get_mtr(
+/*=============*/
+				/* out: mtr */
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/******************************************************************
+Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached. If there have been modifications
+to the page where pcur is positioned, this can be used instead of
+btr_pcur_release_leaf. Function btr_pcur_store_position should be used
+before calling this, if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit(
+/*============*/
+	btr_pcur_t*	pcur);	/* in: persistent cursor */
+/******************************************************************
+Differs from btr_pcur_commit in that we can specify the mtr to commit. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+	btr_pcur_t*	pcur,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr to commit */
+/******************************************************************
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+UNIV_INLINE
+ibool
+btr_pcur_is_detached(
+/*=================*/
+				/* out: TRUE if detached */
+	btr_pcur_t*	pcur);	/* in: persistent cursor */
+/*************************************************************
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+				/* out: TRUE if the cursor was not after last
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the previous record in the tree. If no records
+are left, the cursor stays 'before first in tree'. */
+
+ibool
+btr_pcur_move_to_prev(
+/*==================*/
+				/* out: TRUE if the cursor was not before first
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+				/* out: TRUE if the cursor moved forward,
+				ending on a user record */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the first record on the next page.
+Releases the latch on the current page, and bufferunfixes it.
+Note that there must not be modifications on the current page,
+as then the x-latch can be released only in mtr_commit. */
+
+void
+btr_pcur_move_to_next_page(
+/*=======================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
+				last record of the current page */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor backward if it is on the first record
+of the page. Releases the latch on the current page, and bufferunfixes
+it. Note that to prevent a possible deadlock, the operation first
+stores the position of the cursor, releases the leaf latch, acquires
+necessary latches and restores the cursor position again before returning.
+The alphabetical position of the cursor is guaranteed to be sensible
+on return, but it may happen that the cursor is not positioned on the
+last record of any page, because the structure of the tree may have
+changed while the cursor had no latches. */
+
+void
+btr_pcur_move_backward_from_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the
+				first record of the current page */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Returns the btr cursor component of a persistent cursor. */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+				/* out: pointer to btr cursor component */
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/*************************************************************
+Returns the page cursor component of a persistent cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+				/* out: pointer to page cursor component */
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/*************************************************************
+Returns the page of a persistent cursor. */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+				/* out: pointer to the page */
+	btr_pcur_t*	cursor);/* in: persistent cursor */
+/*************************************************************
+Returns the record of a persistent cursor. */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+				/* out: pointer to the record */
+	btr_pcur_t*	cursor);/* in: persistent cursor */
+/*************************************************************
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is after the last user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is before the first user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+
+
+/* The persistent B-tree cursor structure. This is used mainly for SQL
+selects, updates, and deletes. */
+
+struct btr_pcur_struct{
+	btr_cur_t	btr_cur;	/* a B-tree cursor */
+	ulint		latch_mode;	/* see FIXME note below!
+					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
+					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
+					depending on the latching state of
+					the page and tree where the cursor is
+					positioned; the last value means that
+					the cursor is not currently positioned:
+					we say then that the cursor is
+					detached; it can be restored to
+					attached if the old position was
+					stored in old_rec */
+	ulint		old_stored;	/* BTR_PCUR_OLD_STORED
+					or BTR_PCUR_OLD_NOT_STORED */
+	rec_t*		old_rec;	/* if cursor position is stored,
+					contains an initial segment of the
+					latest record cursor was positioned
+					either on, before, or after */
+	ulint		rel_pos;	/* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
+					BTR_PCUR_AFTER, depending on whether
+					cursor was on, before, or after the
+					old_rec record */
+	dulint		modify_clock;	/* the modify clock value of the
+					buffer block when the cursor position
+					was stored */
+	ulint		pos_state;	/* see FIXME note below!
+					BTR_PCUR_IS_POSITIONED,
+					BTR_PCUR_WAS_POSITIONED,
+					BTR_PCUR_NOT_POSITIONED */
+	ulint		search_mode;	/* PAGE_CUR_G, ... */
+	/*-----------------------------*/
+	/* NOTE that the following fields may possess dynamically allocated
+	memory, which should be freed if not needed anymore! */
+
+	mtr_t*		mtr;		/* NULL, or this field may contain
+					a mini-transaction which holds the
+					latch on the cursor page */
+	byte*		old_rec_buf;	/* NULL, or a dynamically allocated
+					buffer for old_rec */
+	ulint		buf_size;	/* old_rec_buf size if old_rec_buf
+					is not NULL */
+};
+
+#define BTR_PCUR_IS_POSITIONED	1997660512	/* FIXME: currently, the state
+						can be BTR_PCUR_IS_POSITIONED,
+						though it really should be
+						BTR_PCUR_WAS_POSITIONED,
+						because we have no obligation
+						to commit the cursor with
+						mtr; similarly latch_mode may
+						be out of date */
+#define BTR_PCUR_WAS_POSITIONED	1187549791
+#define BTR_PCUR_NOT_POSITIONED 1328997689
+
+#define BTR_PCUR_OLD_STORED	908467085
+#define BTR_PCUR_OLD_NOT_STORED	122766467
+
+#ifndef UNIV_NONINL
+#include "btr0pcur.ic"
+#endif
+				
+#endif
diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic
new file mode 100644
index 00000000000..7f31f8fe502
--- /dev/null
+++ b/innobase/include/btr0pcur.ic
@@ -0,0 +1,598 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+				/* out: BTR_PCUR_ON, ... */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor);
+	ut_ad(cursor->old_rec);
+	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+			|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	return(cursor->rel_pos);
+}
+
+/*************************************************************
+Sets the mtr field for a pcur. */
+UNIV_INLINE
+void
+btr_pcur_set_mtr(
+/*=============*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in, own: mtr */
+{
+	ut_ad(cursor);
+
+	cursor->mtr = mtr;
+}
+
+/*************************************************************
+Gets the mtr field for a pcur. */
+UNIV_INLINE
+mtr_t*
+btr_pcur_get_mtr(
+/*=============*/
+				/* out: mtr */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor);
+
+	return(cursor->mtr);
+}
+
+/*************************************************************
+Returns the btr cursor component of a persistent cursor. */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+				/* out: pointer to btr cursor component */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	return(&(cursor->btr_cur));
+}
+
+/*************************************************************
+Returns the page cursor component of a persistent cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+				/* out: pointer to page cursor component */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	return(btr_cur_get_page_cur(&(cursor->btr_cur)));
+}
+
+/*************************************************************
+Returns the page of a persistent cursor. */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+				/* out: pointer to the page */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Returns the record of a persistent cursor. */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+				/* out: pointer to the record */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
+}
+
+/******************************************************************
+Gets the up_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_GE,
+				otherwise undefined */
+	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
+{
+	btr_cur_t*	btr_cursor;
+
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+			|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
+
+	return(btr_cursor->up_match);
+}
+
+/******************************************************************
+Gets the low_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_LE,
+				otherwise undefined */
+	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
+{
+	btr_cur_t*	btr_cursor;
+
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+			|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+	ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
+
+	return(btr_cursor->low_match);
+}
+
+/*************************************************************
+Checks if the persistent cursor is after the last user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is before the first user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	if ((btr_pcur_is_before_first_on_page(cursor, mtr))
+	    || (btr_pcur_is_after_last_on_page(cursor, mtr))) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+				/* out: TRUE if the cursor moved forward,
+				ending on a user record */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+loop:
+	if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+
+		btr_pcur_move_to_next_page(cursor, mtr);
+	} else {
+		btr_pcur_move_to_next_on_page(cursor, mtr);
+	}
+
+	if (btr_pcur_is_on_user_rec(cursor, mtr)) {
+
+		return(TRUE);
+	}
+
+	goto loop;
+}
+
+/*************************************************************
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+				/* out: TRUE if the cursor was not after last
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	
+	if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+		
+		btr_pcur_move_to_next_page(cursor, mtr);
+
+		return(TRUE);
+	}
+
+	btr_pcur_move_to_next_on_page(cursor, mtr);
+
+	return(TRUE);	
+}
+
+/******************************************************************
+Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached. If there have been modifications
+to the page where pcur is positioned, this can be used instead of
+btr_pcur_release_leaf. Function btr_pcur_store_position should be used
+before calling this, if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit(
+/*============*/
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{
+	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	pcur->latch_mode = BTR_NO_LATCHES;	
+
+	mtr_commit(pcur->mtr);
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}	
+
+/******************************************************************
+Differs from btr_pcur_commit in that we can specify the mtr to commit. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+	btr_pcur_t*	pcur,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr to commit */
+{
+	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	pcur->latch_mode = BTR_NO_LATCHES;	
+
+	mtr_commit(mtr);
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}	
+
+/******************************************************************
+Sets the pcur latch mode to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_detach(
+/*============*/
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{
+	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+	
+	pcur->latch_mode = BTR_NO_LATCHES;
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/******************************************************************
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+UNIV_INLINE
+ibool
+btr_pcur_is_detached(
+/*=================*/
+				/* out: TRUE if detached */
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{
+	if (pcur->latch_mode == BTR_NO_LATCHES) {
+
+		return(TRUE);
+	}	
+
+	return(FALSE);
+}
+
+/******************************************************************
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{	
+	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_rec_buf = NULL;
+	pcur->old_rec = NULL;
+}
+
+/******************************************************************
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open(
+/*==========*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+
+	/* Initialize the cursor */
+
+	btr_pcur_init(cursor);
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = mode;
+	
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+							btr_cursor, 0, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+}
+
+/******************************************************************
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init(
+/*=======================*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page of the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/* in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = mode;
+	
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+					btr_cursor, has_search_latch, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*********************************************************************
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+	ibool		from_left,	/* in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: latch mode */
+	btr_pcur_t*	pcur,		/* in: cursor */
+	ibool		do_init,	/* in: TRUE if should be initialized */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	pcur->latch_mode = latch_mode;
+
+	if (from_left) {
+		pcur->search_mode = PAGE_CUR_G;
+	} else {
+		pcur->search_mode = PAGE_CUR_L;
+	}
+
+	if (do_init) {
+		btr_pcur_init(pcur);
+	}
+
+	btr_cur_open_at_index_side(from_left, index, latch_mode,
+					btr_pcur_get_btr_cur(pcur), mtr);	
+	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos(
+/*=====================*/
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	/* Initialize the cursor */
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = PAGE_CUR_G;
+	
+	btr_pcur_init(cursor);
+
+	btr_cur_open_at_rnd_pos(index, latch_mode,
+					btr_pcur_get_btr_cur(cursor), mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+	
+/******************************************************************
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	if (cursor->old_rec_buf != NULL) {
+
+		mem_free(cursor->old_rec_buf);
+
+		cursor->old_rec = NULL;
+		cursor->old_rec_buf = NULL;
+	}
+
+	cursor->btr_cur.page_cur.rec = NULL;
+	cursor->old_rec = NULL;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	
+	cursor->latch_mode = BTR_NO_LATCHES;
+	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
+}
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
new file mode 100644
index 00000000000..c319e16d740
--- /dev/null
+++ b/innobase/include/btr0sea.h
@@ -0,0 +1,269 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0sea_h
+#define btr0sea_h
+
+#include "univ.i"
+
+#include "rem0rec.h"
+#include "dict0dict.h"
+#include "btr0types.h"
+#include "mtr0mtr.h"
+#include "ha0ha.h"
+
+/*********************************************************************
+Creates and initializes the adaptive search system at a database start. */
+
+void
+btr_search_sys_create(
+/*==================*/
+	ulint	hash_size);	/* in: hash index hash table size */
+/************************************************************************
+Returns search info for an index. */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+				/* out: search info; search mutex reserved */
+	dict_index_t*	index);	/* in: index */
+/*********************************************************************
+Creates and initializes a search info struct. */
+
+btr_search_t*
+btr_search_info_create(
+/*===================*/
+				/* out, own: search info struct */
+	mem_heap_t*	heap);	/* in: heap where created */
+/*************************************************************************
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/* in: index of the cursor */
+	btr_cur_t*	cursor);/* in: cursor which was just positioned */
+/**********************************************************************
+Tries to guess the right search position based on the search pattern info
+of the index. */
+
+ibool
+btr_search_guess_on_pattern(
+/*========================*/
+					/* out: TRUE if succeeded */	
+	dict_index_t*	index,		/* in: index */
+	btr_search_t*	info,		/* in: index search info */
+	dtuple_t*	tuple,		/* in: logical record */
+	ulint		mode,		/* in: PAGE_CUR_L, ... */
+	ulint		latch_mode, 	/* in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor, 	/* out: tree cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/**********************************************************************
+Tries to guess the right search position based on the hash search info
+of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
+and the function returns TRUE, then cursor->up_match and cursor->low_match
+both have sensible values. */
+
+ibool
+btr_search_guess_on_hash(
+/*=====================*/
+					/* out: TRUE if succeeded */	
+	dict_index_t*	index,		/* in: index */
+	btr_search_t*	info,		/* in: index search info */
+	dtuple_t*	tuple,		/* in: logical record */
+	ulint		mode,		/* in: PAGE_CUR_L, ... */
+	ulint		latch_mode, 	/* in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor, 	/* out: tree cursor */
+	ulint		has_search_latch,/* in: latch mode the caller
+					currently has on btr_search_latch:
+					RW_S_LATCH, RW_X_LATCH, or 0 */
+	mtr_t*		mtr);		/* in: mtr */
+/************************************************************************
+Moves or deletes hash entries for moved records. If new_page is already hashed,
+then the hash index for page, if any, is dropped. If new_page is not hashed,
+and page is hashed, then a new hash index is built to new_page with the same
+parameters as page (this often happens when a page is split). */
+
+void
+btr_search_move_or_delete_hash_entries(
+/*===================================*/
+	page_t*	new_page,	/* in: records are copied to this page */
+	page_t*	page);		/* in: index page */
+/************************************************************************
+Drops a page hash index. */
+
+void
+btr_search_drop_page_hash_index(
+/*============================*/
+	page_t*	page);	/* in: index page, s- or x-latched */
+/************************************************************************
+Drops a page hash index when a page is freed from a fseg to the file system.
+Drops possible hash index if the page happens to be in the buffer pool. */
+
+void
+btr_search_drop_page_hash_when_freed(
+/*=================================*/
+	ulint	space,		/* in: space id */
+	ulint	page_no);	/* in: page number */
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_node_on_insert(
+/*==================================*/
+	btr_cur_t*	cursor);/* in: cursor which was positioned to the
+				place to insert using btr_cur_search_...,
+				and the new record has been inserted next
+				to the cursor */
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_on_insert(
+/*=============================*/
+	btr_cur_t*	cursor);/* in: cursor which was positioned to the
+				place to insert using btr_cur_search_...,
+				and the new record has been inserted next
+				to the cursor */
+/************************************************************************
+Updates the page hash index when a single record is deleted from a page. */
+
+void
+btr_search_update_hash_on_delete(
+/*=============================*/
+	btr_cur_t*	cursor);/* in: cursor which was positioned on the
+				record to delete using btr_cur_search_...,
+				the record is not yet deleted */
+/************************************************************************
+Prints info of the search system. */
+
+void
+btr_search_print_info(void);
+/*=======================*/
+/************************************************************************
+Prints info of searches on an index. */
+
+void
+btr_search_index_print_info(
+/*========================*/
+	dict_index_t*	index);	/* in: index */
+/************************************************************************
+Prints info of searches on a table. */
+
+void
+btr_search_table_print_info(
+/*========================*/
+	char*	name);	/* in: table name */
+/************************************************************************
+Validates the search system. */
+
+ibool
+btr_search_validate(void);
+/*=====================*/
+
+
+/* Search info directions */
+#define BTR_SEA_NO_DIRECTION	1
+#define BTR_SEA_LEFT		2
+#define BTR_SEA_RIGHT		3
+#define BTR_SEA_SAME_REC	4
+
+/* The search info struct in an index */
+
+struct btr_search_struct{
+	/* The following 4 fields are currently not used: */
+	rec_t*	last_search;	/* pointer to the lower limit record of the
+				previous search; NULL if not known */
+	ulint	n_direction;	/* number of consecutive searches in the
+				same direction */
+	ulint	direction;	/* BTR_SEA_NO_DIRECTION, BTR_SEA_LEFT,
+				BTR_SEA_RIGHT, BTR_SEA_SAME_REC,
+				or BTR_SEA_SAME_PAGE */
+	dulint	modify_clock;	/* value of modify clock at the time
+				last_search was stored */
+	/*----------------------*/
+	/* The following 4 fields are not protected by any latch: */
+	page_t*	root_guess;	/* the root page frame when it was last time
+				fetched, or NULL */
+	ulint	hash_analysis;	/* when this exceeds a certain value, the
+				hash analysis starts; this is reset if no
+				success noticed */
+	ibool	last_hash_succ;	/* TRUE if the last search would have
+				succeeded, or did succeed, using the hash
+				index; NOTE that the value here is not exact:
+				it is not calculated for every search, and the
+				calculation itself is not always accurate! */
+	ulint	n_hash_potential;/* number of consecutive searches which would
+				have succeeded, or did succeed, using the hash
+				index */
+	/*----------------------*/			
+	ulint	n_fields;	/* recommended prefix length for hash search:
+				number of full fields */
+	ulint	n_bytes;	/* recommended prefix: number of bytes in
+				an incomplete field */
+	ulint	side;		/* BTR_SEARCH_LEFT_SIDE or
+				BTR_SEARCH_RIGHT_SIDE, depending on whether
+				the leftmost record of several records with
+				the same prefix should be indexed in the
+				hash index */
+	/*----------------------*/
+	ulint	n_hash_succ;	/* number of successful hash searches thus
+				far */
+	ulint	n_hash_fail;	/* number of failed hash searches */
+	ulint	n_patt_succ;	/* number of successful pattern searches thus
+				far */
+	ulint	n_searches;	/* number of searches */
+};
+
+/* The hash index system */
+
+typedef struct btr_search_sys_struct	btr_search_sys_t;
+
+struct btr_search_sys_struct{
+	hash_table_t*	hash_index;
+};
+
+extern btr_search_sys_t*	btr_search_sys;
+
+/* The latch protecting the adaptive search system: this latch protects the
+(1) positions of records on those pages where a hash index has been built.
+NOTE: It does not protect values of non-ordering fields within a record from
+being updated in-place! We can use fact (1) to perform unique searches to
+indexes. */
+
+extern rw_lock_t*	btr_search_latch_temp;
+
+#define btr_search_latch	(*btr_search_latch_temp)
+
+extern ulint	btr_search_n_succ;
+extern ulint	btr_search_n_hash_fail;
+
+/* After change in n_fields or n_bytes in info, this many rounds are waited
+before starting the hash analysis again: this is to save CPU time when there
+is no hope in building a hash index. */
+
+#define BTR_SEARCH_HASH_ANALYSIS	17
+
+#define BTR_SEARCH_LEFT_SIDE	1
+#define BTR_SEARCH_RIGHT_SIDE	2
+
+/* Limit of consecutive searches for trying a search shortcut on the search
+pattern */
+
+#define BTR_SEARCH_ON_PATTERN_LIMIT	3
+
+/* Limit of consecutive searches for trying a search shortcut using the hash
+index */
+
+#define BTR_SEARCH_ON_HASH_LIMIT	3
+
+#ifndef UNIV_NONINL
+#include "btr0sea.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/btr0sea.ic b/innobase/include/btr0sea.ic
new file mode 100644
index 00000000000..63a3a658cf4
--- /dev/null
+++ b/innobase/include/btr0sea.ic
@@ -0,0 +1,65 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "dict0mem.h"
+#include "btr0cur.h"
+#include "buf0buf.h"
+
+/*************************************************************************
+Updates the search info. */
+
+void
+btr_search_info_update_slow(
+/*========================*/
+	btr_search_t*	info,	/* in: search info */
+	btr_cur_t*	cursor);/* in: cursor which was just positioned */
+
+/************************************************************************
+Returns search info for an index. */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+				/* out: search info; search mutex reserved */
+	dict_index_t*	index)	/* in: index */
+{
+	ut_ad(index);
+
+	return(index->search_info);
+}
+
+/*************************************************************************
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/* in: index of the cursor */
+	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
+{
+	btr_search_t*	info;
+
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)
+			&& !rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+	info = btr_search_get_info(index);
+
+	info->hash_analysis++;
+
+	if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
+
+		/* Do nothing */
+
+		return;
+
+	}
+
+	ut_ad(cursor->flag != BTR_CUR_HASH);
+
+	btr_search_info_update_slow(info, cursor);
+}
diff --git a/innobase/include/btr0types.h b/innobase/include/btr0types.h
new file mode 100644
index 00000000000..03a61480e2e
--- /dev/null
+++ b/innobase/include/btr0types.h
@@ -0,0 +1,21 @@
+/************************************************************************
+The index tree general types
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0types_h
+#define btr0types_h
+
+#include "univ.i"
+
+#include "rem0types.h"
+#include "page0types.h"
+
+typedef struct btr_pcur_struct		btr_pcur_t;
+typedef struct btr_cur_struct 		btr_cur_t;
+typedef struct btr_search_struct	btr_search_t;
+
+#endif 
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
new file mode 100644
index 00000000000..08c59d60c91
--- /dev/null
+++ b/innobase/include/buf0buf.h
@@ -0,0 +1,834 @@
+/*   Innobase relational database engine; Copyright (C) 2001 Innobase Oy
+     
+     This program is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License 2
+     as published by the Free Software Foundation in June 1991.
+     
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+     
+     You should have received a copy of the GNU General Public License 2
+     along with this program (in file COPYING); if not, write to the Free
+     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+/******************************************************
+The database buffer pool high-level routines
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0buf_h
+#define buf0buf_h
+
+#include "univ.i"
+#include "fil0fil.h"
+#include "mtr0types.h"
+#include "buf0types.h"
+#include "sync0rw.h"
+#include "hash0hash.h"
+#include "ut0byte.h"
+
+/* Flags for flush types */
+#define BUF_FLUSH_LRU		1
+#define BUF_FLUSH_SINGLE_PAGE	2
+#define BUF_FLUSH_LIST		3	/* An array in the pool struct
+					has size BUF_FLUSH_LIST + 1: if you
+					add more flush types, put them in
+					the middle! */
+/* Modes for buf_page_get_gen */
+#define BUF_GET			10	/* get always */
+#define	BUF_GET_IF_IN_POOL	11	/* get if in pool */
+#define	BUF_GET_NOWAIT		12	/* get if can set the latch without
+					waiting */
+#define BUF_GET_NO_LATCH	14	/* get and bufferfix, but set no latch;
+					we have separated this case, because
+					it is error-prone programming not to
+					set a latch, and it should be used
+					with care */
+/* Modes for buf_page_get_known_nowait */
+#define BUF_MAKE_YOUNG	51
+#define BUF_KEEP_OLD	52
+
+extern buf_pool_t* 	buf_pool; 	/* The buffer pool of the database */
+extern ibool		buf_debug_prints;/* If this is set TRUE, the program
+					prints info whenever read or flush
+					occurs */
+
+/************************************************************************
+Initializes the buffer pool of the database. */
+
+void
+buf_pool_init(
+/*==========*/
+	ulint	max_size,	/* in: maximum size of the pool in blocks */
+	ulint	curr_size);	/* in: current size to use, must be <=
+				max_size */
+/*************************************************************************
+Gets the current size of buffer pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void);
+/*========================*/
+			/* out: size in bytes */
+/*************************************************************************
+Gets the maximum size of buffer pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_max_size(void);
+/*=======================*/
+			/* out: size in bytes */
+/************************************************************************
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+ut_dulint_zero if all modified pages have been flushed to disk. */
+UNIV_INLINE
+dulint
+buf_pool_get_oldest_modification(void);
+/*==================================*/
+				/* out: oldest modification in pool,
+				ut_dulint_zero if none */
+/*************************************************************************
+Allocates a buffer frame. */
+
+buf_frame_t*
+buf_frame_alloc(void);
+/*==================*/
+				/* out: buffer frame */
+/*************************************************************************
+Frees a buffer frame which does not contain a file page. */
+
+void
+buf_frame_free(
+/*===========*/
+	buf_frame_t*	frame);	/* in: buffer frame */
+/*************************************************************************
+Copies contents of a buffer frame to a given buffer. */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+				/* out: buf */
+	byte*		buf,	/* in: buffer to copy to */
+	buf_frame_t*	frame);	/* in: buffer frame */
+/******************************************************************
+NOTE! The following macros should be used instead of buf_page_get_gen,
+to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
+in LA! */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_get(SP, OF, LA, MTR)    buf_page_get_gen(\
+				SP, OF, LA, NULL,\
+				BUF_GET, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_get(SP, OF, LA, MTR)    buf_page_get_gen(\
+				SP, OF, LA, NULL,\
+				BUF_GET, MTR)
+#endif
+/******************************************************************
+Use these macros to bufferfix a page with no latching. Remember not to
+read the contents of the page unless you know it is safe. Do not modify
+the contents of the page! We have separated this case, because it is
+error-prone programming not to set a latch, and it should be used
+with care. */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_get_with_no_latch(SP, OF, MTR)    buf_page_get_gen(\
+				SP, OF, RW_NO_LATCH, NULL,\
+				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_get_with_no_latch(SP, OF, MTR)    buf_page_get_gen(\
+				SP, OF, RW_NO_LATCH, NULL,\
+				BUF_GET_NO_LATCH, MTR)
+#endif
+/******************************************************************
+NOTE! The following macros should be used instead of buf_page_get_gen, to
+improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_get_nowait(SP, OF, LA, MTR)    buf_page_get_gen(\
+				SP, OF, LA, NULL,\
+				BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_get_nowait(SP, OF, LA, MTR)    buf_page_get_gen(\
+				SP, OF, LA, NULL,\
+				BUF_GET_NOWAIT, MTR)
+#endif
+/******************************************************************
+NOTE! The following macros should be used instead of
+buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
+RW_X_LATCH are allowed as LA! */
+#ifdef UNIV_SYNC_DEBUG
+#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
+				LA, G, MC, __FILE__, __LINE__, MTR)
+#else
+#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
+				LA, G, MC, MTR)
+#endif
+/************************************************************************
+This is the general function used to get optimistic access to a database
+page. */
+
+ibool
+buf_page_optimistic_get_func(
+/*=========================*/
+				/* out: TRUE if success */
+	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+	buf_frame_t*	guess,	/* in: guessed frame */
+	dulint		modify_clock,/* in: modify clock value if mode is
+				..._GUESS_ON_CLOCK */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line where called */
+#endif
+	mtr_t*		mtr);	/* in: mini-transaction */
+/************************************************************************
+Tries to get the page, but if file io is required, releases all latches
+in mtr down to the given savepoint. If io is required, this function
+retrieves the page to buffer buf_pool, but does not bufferfix it or latch
+it. */
+UNIV_INLINE
+buf_frame_t*
+buf_page_get_release_on_io(
+/*=======================*/
+				/* out: pointer to the frame, or NULL
+				if not in buffer buf_pool */
+	ulint	space,		/* in: space id */
+	ulint	offset,		/* in: offset of the page within space
+				in units of a page */
+	buf_frame_t* guess,	/* in: guessed frame or NULL */
+	ulint	rw_latch,	/* in: RW_X_LATCH, RW_S_LATCH,
+				or RW_NO_LATCH */
+	ulint	savepoint,	/* in: mtr savepoint */
+	mtr_t*	mtr);		/* in: mtr */
+/************************************************************************
+This is used to get access to a known database page, when no waiting can be
+done. */
+
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+				/* out: TRUE if success */
+	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+	buf_frame_t*	guess,	/* in: the known page frame */
+	ulint		mode,	/* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line where called */
+#endif
+	mtr_t*		mtr);	/* in: mini-transaction */
+/************************************************************************
+This is the general function used to get access to a database page. */
+
+buf_frame_t*
+buf_page_get_gen(
+/*=============*/
+				/* out: pointer to the frame or NULL */
+	ulint		space,	/* in: space id */
+	ulint		offset,	/* in: page number */
+	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+	buf_frame_t*	guess,	/* in: guessed frame or NULL */
+	ulint		mode,	/* in: BUF_GET, BUF_GET_IF_IN_POOL,
+				BUF_GET_NO_LATCH */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line where called */
+#endif
+	mtr_t*		mtr);	/* in: mini-transaction */
+/************************************************************************
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_init_for_read above). */
+
+buf_frame_t*
+buf_page_create(
+/*============*/
+			/* out: pointer to the frame, page bufferfixed */
+	ulint	space,	/* in: space id */
+	ulint	offset,	/* in: offset of the page within space in units of
+			a page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/* in: buffer block */
+	ulint		rw_latch,	/* in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+	mtr_t*		mtr);		/* in: mtr */
+/************************************************************************
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from from slipping out of
+the buffer pool. */
+
+void
+buf_page_make_young(
+/*=================*/
+	buf_frame_t*	frame);	/* in: buffer frame of a file page */
+/************************************************************************
+Returns TRUE if the page can be found in the buffer pool hash table. NOTE
+that it is possible that the page is not yet read from disk, though. */
+
+ibool
+buf_page_peek(
+/*==========*/
+			/* out: TRUE if found from page hash table,
+			NOTE that the page is not necessarily yet read
+			from disk! */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number */
+/************************************************************************
+Returns the buffer control block if the page can be found in the buffer
+pool. NOTE that it is possible that the page is not yet read
+from disk, though. This is a very low-level function: use with care! */
+
+buf_block_t*
+buf_page_peek_block(
+/*================*/
+			/* out: control block if found from page hash table,
+			otherwise NULL; NOTE that the page is not necessarily
+			yet read from disk! */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number */
+/************************************************************************
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex. */
+UNIV_INLINE
+ibool
+buf_block_peek_if_too_old(
+/*======================*/
+				/* out: TRUE if should be made younger */
+	buf_block_t*	block);	/* in: block to make younger */
+/************************************************************************
+Returns the current state of is_hashed of a page. FALSE if the page is
+not in the pool. NOTE that this operation does not fix the page in the
+pool if it is found there. */
+
+ibool
+buf_page_peek_if_search_hashed(
+/*===========================*/
+			/* out: TRUE if page hash index is built in search
+			system */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number */
+/************************************************************************
+Gets the youngest modification log sequence number for a frame.
+Returns zero if not file page or no modification occurred yet. */
+UNIV_INLINE
+dulint
+buf_frame_get_newest_modification(
+/*==============================*/
+				/* out: newest modification to page */
+	buf_frame_t*	frame);	/* in: pointer to a frame */
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_frame_modify_clock_inc(
+/*=======================*/
+				/* out: new value */
+	buf_frame_t*	frame);	/* in: pointer to a frame */
+/************************************************************************
+Returns the value of the modify clock. The caller must have an s-lock 
+or x-lock on the block. */
+UNIV_INLINE
+dulint
+buf_frame_get_modify_clock(
+/*=======================*/
+				/* out: value */
+	buf_frame_t*	frame);	/* in: pointer to a frame */
+/**************************************************************************
+Gets the page number of a pointer pointing within a buffer frame containing
+a file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_page_no(
+/*==================*/
+			/* out: page number */
+	byte*	ptr);	/* in: pointer to within a buffer frame */
+/**************************************************************************
+Gets the space id of a pointer pointing within a buffer frame containing a
+file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_space_id(
+/*===================*/
+			/* out: space id */
+	byte*	ptr);	/* in: pointer to within a buffer frame */
+/**************************************************************************
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	byte*		ptr,	/* in: pointer to a buffer frame */
+	ulint*		space,	/* out: space id */
+	fil_addr_t*	addr);	/* out: page offset and byte offset */
+/**************************************************************************
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table. */
+UNIV_INLINE
+ulint
+buf_frame_get_lock_hash_val(
+/*========================*/
+			/* out: lock hash value */
+	byte*	ptr);	/* in: pointer to within a buffer frame */
+/**************************************************************************
+Gets the mutex number protecting the page record lock hash chain in the lock
+table. */
+UNIV_INLINE
+mutex_t*
+buf_frame_get_lock_mutex(
+/*=====================*/
+			/* out: mutex */
+	byte*	ptr);	/* in: pointer to within a buffer frame */
+/***********************************************************************
+Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+			/* out: pointer to block */
+	byte*	ptr);	/* in: pointer to a frame */
+/***********************************************************************
+Checks if a pointer points to the block array of the buffer pool (blocks, not
+the frames). */
+UNIV_INLINE
+ibool
+buf_pool_is_block(
+/*==============*/
+			/* out: TRUE if pointer to block */
+	void*	ptr);	/* in: pointer to memory */
+/*************************************************************************
+Validates the buffer pool data structure. */
+
+ibool
+buf_validate(void);
+/*==============*/
+/*************************************************************************
+Prints info of the buffer pool data structure. */
+
+void
+buf_print(void);
+/*===========*/
+/*************************************************************************
+Prints info of the buffer i/o. */
+
+void
+buf_print_io(void);
+/*==============*/
+/*************************************************************************
+Checks that all file pages in the buffer are in a replaceable state. */
+
+ibool
+buf_all_freed(void);
+/*===============*/
+/*************************************************************************
+Checks that there currently are no pending i/o-operations for the buffer
+pool. */
+
+ibool
+buf_pool_check_no_pending_io(void);
+/*==============================*/
+				/* out: TRUE if there is no pending i/o */
+/*************************************************************************
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+
+void
+buf_pool_invalidate(void);
+/*=====================*/
+
+/*========================================================================
+--------------------------- LOWER LEVEL ROUTINES -------------------------
+=========================================================================*/
+
+/*************************************************************************
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. If
+UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
+UNIV_INLINE
+void
+buf_page_dbg_add_level(
+/*===================*/
+	buf_frame_t*	frame,	/* in: buffer page where we have acquired
+				a latch */
+	ulint		level);	/* in: latching order level */
+/*************************************************************************
+Gets a pointer to the memory frame of a block. */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+				/* out: pointer to the frame */
+	buf_block_t*	block);	/* in: pointer to the control block */
+/*************************************************************************
+Gets the space id of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+				/* out: space id */
+	buf_block_t*	block);	/* in: pointer to the control block */
+/*************************************************************************
+Gets the page number of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+				/* out: page number */
+	buf_block_t*	block);	/* in: pointer to the control block */
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align(
+/*============*/
+			/* out: pointer to block */
+	byte*	ptr);	/* in: pointer to a frame */
+/************************************************************************
+This function is used to get info if there is an io operation
+going on on a buffer page. */
+UNIV_INLINE
+ibool
+buf_page_io_query(
+/*==============*/
+				/* out: TRUE if io going on */
+	buf_block_t*	block);	/* in: pool block, must be bufferfixed */
+/***********************************************************************
+Accessor function for block array. */
+UNIV_INLINE
+buf_block_t*
+buf_pool_get_nth_block(
+/*===================*/
+				/* out: pointer to block */
+	buf_pool_t*	pool,	/* in: pool */
+	ulint		i);	/* in: index of the block */
+/************************************************************************
+Function which inits a page for read to the buffer buf_pool. If the page is
+already in buf_pool, does nothing. Sets the io_fix flag to BUF_IO_READ and
+sets a non-recursive exclusive lock on the buffer frame. The io-handler must
+take care that the flag is cleared and the lock released later. This is one
+of the functions which perform the state transition NOT_USED => FILE_PAGE to
+a block (the other is buf_page_create). */ 
+
+buf_block_t*
+buf_page_init_for_read(
+/*===================*/
+			/* out: pointer to the block */
+	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number */
+/************************************************************************
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool. */
+
+void
+buf_page_io_complete(
+/*=================*/
+	buf_block_t*	block);	/* in: pointer to the block in question */
+/************************************************************************
+Calculates a folded value of a file page address to use in the page hash
+table. */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+			/* out: the folded value */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: offset of the page within space */
+/**********************************************************************
+Returns the control block of a file page, NULL if not found. */
+UNIV_INLINE
+buf_block_t*
+buf_page_hash_get(
+/*==============*/
+			/* out: block, NULL if not found */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: offset of the page within space */
+/***********************************************************************
+Increments the pool clock by one and returns its new value. Remember that
+in the 32 bit version the clock wraps around at 4 billion! */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void);
+/*====================*/
+			/* out: new clock value */
+/*************************************************************************
+Gets the current length of the free list of buffer blocks. */
+
+ulint
+buf_get_free_list_len(void);
+/*=======================*/
+
+
+			
+/* The buffer control block structure */
+
+struct buf_block_struct{
+
+	/* 1. General fields */
+
+	ulint		state;		/* state of the control block:
+					BUF_BLOCK_NOT_USED, ... */
+	byte*		frame;		/* pointer to buffer frame which
+					is of size UNIV_PAGE_SIZE, and
+					aligned to an address divisible by
+					UNIV_PAGE_SIZE */
+	ulint		space;		/* space id of the page */
+	ulint		offset;		/* page number within the space */
+	ulint		lock_hash_val;	/* hashed value of the page address
+					in the record lock hash table */
+	mutex_t*	lock_mutex;	/* mutex protecting the chain in the
+					record lock hash table */
+	rw_lock_t	lock;		/* read-write lock of the buffer
+					frame */
+	rw_lock_t	read_lock;	/* rw-lock reserved when a page read
+					to the frame is requested; a thread
+					can wait for this rw-lock if it wants
+					to wait for the read to complete;
+					the usual way is to wait for lock,
+					but if the thread just wants a
+					bufferfix and no latch on the page,
+					then it can wait for this rw-lock */
+	buf_block_t*	hash;		/* node used in chaining to the page
+					hash table */
+	/* 2. Page flushing fields */
+
+	UT_LIST_NODE_T(buf_block_t) flush_list;
+					/* node of the modified, not yet
+					flushed blocks list */
+	dulint		newest_modification;
+					/* log sequence number of the youngest
+					modification to this block, zero if
+					not modified */
+	dulint		oldest_modification;
+					/* log sequence number of the START of
+					the log entry written of the oldest
+					modification to this block which has
+					not yet been flushed on disk; zero if
+					all modifications are on disk */
+	ulint		flush_type;	/* if this block is currently being
+					flushed to disk, this tells the
+					flush_type: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST */
+
+	/* 3. LRU replacement algorithm fields */
+
+	UT_LIST_NODE_T(buf_block_t) free;
+					/* node of the free block list */
+	UT_LIST_NODE_T(buf_block_t) LRU;
+					/* node of the LRU list */
+	ulint		LRU_position;	/* value which monotonically
+					decreases (or may stay constant if
+					the block is in the old blocks) toward
+					the end of the LRU list, if the pool
+					ulint_clock has not wrapped around:
+					NOTE that this value can only be used
+					in heuristic algorithms, because of
+					the possibility of a wrap-around! */
+	ulint		freed_page_clock;/* the value of freed_page_clock
+					buffer pool when this block was
+					last time put to the head of the
+					LRU list */
+	ibool		old;		/* TRUE if the block is in the old
+					blocks in the LRU list */
+	ibool		accessed;	/* TRUE if the page has been accessed
+					while in the buffer pool: read-ahead
+					may read in pages which have not been
+					accessed yet */
+	ulint		buf_fix_count;	/* count of how manyfold this block
+					is currently bufferfixed */
+	ulint		io_fix;		/* if a read is pending to the frame,
+					io_fix is BUF_IO_READ, in the case
+					of a write BUF_IO_WRITE, otherwise 0 */
+	/* 4. Optimistic search field */
+
+	dulint		modify_clock;	/* this clock is incremented every
+					time a pointer to a record on the
+					page may become obsolete; this is
+					used in the optimistic cursor
+					positioning: if the modify clock has
+					not changed, we know that the pointer
+					is still valid; this field may be
+					changed if the thread (1) owns the
+					pool mutex and the page is not
+					bufferfixed, or (2) the thread has an
+					x-latch on the block */
+
+	/* 5. Hash search fields: NOTE that these fields are protected by
+	btr_search_mutex */
+	
+	ulint		n_hash_helps;	/* counter which controls building
+					of a new hash index for the page */
+	ulint		n_fields;	/* recommended prefix length for hash
+					search: number of full fields */
+	ulint		n_bytes;	/* recommended prefix: number of bytes
+					in an incomplete field */
+	ulint		side;		/* BTR_SEARCH_LEFT_SIDE or
+					BTR_SEARCH_RIGHT_SIDE, depending on
+					whether the leftmost record of several
+					records with the same prefix should be
+					indexed in the hash index */
+	ibool		is_hashed;	/* TRUE if hash index has already been
+					built on this page; note that it does
+					not guarantee that the index is
+					complete, though: there may have been
+					hash collisions, record deletions,
+					etc. */
+	ulint		curr_n_fields;	/* prefix length for hash indexing:
+					number of full fields */
+	ulint		curr_n_bytes;	/* number of bytes in hash indexing */
+	ulint		curr_side;	/* BTR_SEARCH_LEFT_SIDE or
+					BTR_SEARCH_RIGHT_SIDE in hash
+					indexing */
+	/* 6. Debug fields */
+
+	rw_lock_t	debug_latch;	/* in the debug version, each thread
+					which bufferfixes the block acquires
+					an s-latch here; so we can use the
+					debug utilities in sync0rw */
+};
+
+/* The buffer pool structure. NOTE! The definition appears here only for
+other modules of this directory (buf) to see it. Do not use from outside! */
+
+struct buf_pool_struct{
+
+	/* 1. General fields */
+
+	mutex_t		mutex;		/* mutex protecting the buffer pool
+					struct and control blocks, except the
+					read-write lock in them */
+	byte*		frame_mem;	/* pointer to the memory area which
+					was allocated for the frames */
+	byte*		frame_zero;	/* pointer to the first buffer frame:
+					this may differ from frame_mem, because
+					this is aligned by the frame size */
+	buf_block_t*	blocks;		/* array of buffer control blocks */
+	ulint		max_size;	/* number of control blocks ==
+					maximum pool size in pages */
+	ulint		curr_size;	/* current pool size in pages */
+	hash_table_t*	page_hash;	/* hash table of the file pages */
+
+	ulint		n_pend_reads;	/* number of pending read operations */
+	ulint		n_pages_read;	/* number read operations */
+	ulint		n_pages_written;/* number write operations */
+	ulint		n_pages_created;/* number of pages created in the pool
+					with no read */
+	/* 2. Page flushing algorithm fields */
+
+	UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
+					/* base node of the modified block
+					list */
+	ibool		init_flush[BUF_FLUSH_LIST + 1];
+					/* this is TRUE when a flush of the
+					given type is being initialized */
+	ulint		n_flush[BUF_FLUSH_LIST + 1];
+					/* this is the number of pending
+					writes in the given flush type */
+	os_event_t	no_flush[BUF_FLUSH_LIST + 1];
+					/* this is in the set state when there
+					is no flush batch of the given type
+					running */
+	ulint		ulint_clock;	/* a sequence number used to count
+					time. NOTE! This counter wraps
+					around at 4 billion (if ulint ==
+					32 bits)! */
+	ulint		freed_page_clock;/* a sequence number used to count the
+					number of buffer blocks removed from
+					the end of the LRU list; NOTE that
+					this counter may wrap around at 4
+					billion! */
+	ulint		LRU_flush_ended;/* when an LRU flush ends for a page,
+					this is incremented by one; this is
+					set to zero when a buffer block is
+					allocated */
+
+	/* 3. LRU replacement algorithm fields */
+
+	UT_LIST_BASE_NODE_T(buf_block_t) free;
+					/* base node of the free block list */
+	UT_LIST_BASE_NODE_T(buf_block_t) LRU;
+					/* base node of the LRU list */
+	buf_block_t*	LRU_old; 	/* pointer to the about 3/8 oldest
+					blocks in the LRU list; NULL if LRU
+					length less than BUF_LRU_OLD_MIN_LEN */
+	ulint		LRU_old_len;	/* length of the LRU list from
+					the block to which LRU_old points
+					onward, including that block;
+					see buf0lru.c for the restrictions
+					on this value; not defined if
+					LRU_old == NULL */
+};
+
+/* States of a control block */
+#define	BUF_BLOCK_NOT_USED	211	/* is in the free list */
+#define BUF_BLOCK_READY_FOR_USE	212	/* when buf_get_free_block returns
+					a block, it is in this state */
+#define	BUF_BLOCK_FILE_PAGE	213	/* contains a buffered file page */
+#define	BUF_BLOCK_MEMORY	214	/* contains some main memory object */
+#define BUF_BLOCK_REMOVE_HASH	215	/* hash index should be removed
+					before putting to the free list */
+
+/* Io_fix states of a control block; these must be != 0 */
+#define BUF_IO_READ		561
+#define BUF_IO_WRITE		562
+
+/************************************************************************
+Let us list the consistency conditions for different control block states.
+
+NOT_USED:	is in free list, not in LRU list, not in flush list, nor
+		page hash table
+READY_FOR_USE:	is not in free list, LRU list, or flush list, nor page
+		hash table
+MEMORY:		is not in free list, LRU list, or flush list, nor page
+		hash table
+FILE_PAGE:	space and offset are defined, is in page hash table
+		if io_fix == BUF_IO_WRITE,
+			pool: no_flush[block->flush_type] is in reset state,
+			pool: n_flush[block->flush_type] > 0			
+		
+		(1) if buf_fix_count == 0, then
+			is in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			is x-locked,
+				if and only if io_fix == BUF_IO_READ
+			is s-locked,
+				if and only if io_fix == BUF_IO_WRITE
+						
+		(2) if buf_fix_count > 0, then
+			is not in LRU list, not in free list
+			is in flush list,
+				if and only if oldest_modification > 0
+			if io_fix == BUF_IO_READ,		
+				is x-locked
+			if io_fix == BUF_IO_WRITE,
+				is s-locked
+			
+State transitions:
+
+NOT_USED => READY_FOR_USE
+READY_FOR_USE => MEMORY
+READY_FOR_USE => FILE_PAGE
+MEMORY => NOT_USED
+FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if 
+				(1) buf_fix_count == 0,
+				(2) oldest_modification == 0, and
+				(3) io_fix == 0.
+*/
+
+#ifndef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#endif
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
new file mode 100644
index 00000000000..24ada36bca2
--- /dev/null
+++ b/innobase/include/buf0buf.ic
@@ -0,0 +1,641 @@
+/******************************************************
+The database buffer buf_pool
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "mtr0mtr.h"
+
+extern ulint		buf_dbg_counter; /* This is used to insert validation
+					operations in execution in the
+					debug version */
+					
+/************************************************************************
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex. */
+UNIV_INLINE
+ibool
+buf_block_peek_if_too_old(
+/*======================*/
+				/* out: TRUE if should be made younger */
+	buf_block_t*	block)	/* in: block to make younger */
+{
+	if (buf_pool->freed_page_clock >= block->freed_page_clock 
+				+ 1 + (buf_pool->curr_size / 1024)) {
+		
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Gets the current size of buffer buf_pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void)
+/*========================*/
+			/* out: size in bytes */
+{
+	return((buf_pool->curr_size) * UNIV_PAGE_SIZE);
+}	
+
+/*************************************************************************
+Gets the maximum size of buffer buf_pool in bytes. */
+UNIV_INLINE
+ulint
+buf_pool_get_max_size(void)
+/*=======================*/
+			/* out: size in bytes */
+{
+	return((buf_pool->max_size) * UNIV_PAGE_SIZE);
+}	
+
+/***********************************************************************
+Accessor function for block array. */
+UNIV_INLINE
+buf_block_t*
+buf_pool_get_nth_block(
+/*===================*/
+				/* out: pointer to block */
+	buf_pool_t*	buf_pool,/* in: buf_pool */
+	ulint		i)	/* in: index of the block */
+{
+	ut_ad(buf_pool);
+	ut_ad(i < buf_pool->max_size);
+
+	return(i + buf_pool->blocks);
+}	
+
+/***********************************************************************
+Checks if a pointer points to the block array of the buffer pool (blocks, not
+the frames). */
+UNIV_INLINE
+ibool
+buf_pool_is_block(
+/*==============*/
+			/* out: TRUE if pointer to block */
+	void*	ptr)	/* in: pointer to memory */
+{
+	if ((buf_pool->blocks <= (buf_block_t*)ptr)
+	    && ((buf_block_t*)ptr < buf_pool->blocks + buf_pool->max_size)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}	
+
+/************************************************************************
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+ut_dulint_zero if all modified pages have been flushed to disk. */
+UNIV_INLINE
+dulint
+buf_pool_get_oldest_modification(void)
+/*==================================*/
+				/* out: oldest modification in pool,
+				ut_dulint_zero if none */
+{
+	buf_block_t*	block;
+	dulint		lsn;
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	block = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+	if (block == NULL) {
+		lsn = ut_dulint_zero;
+	} else {
+		lsn = block->oldest_modification;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(lsn);
+}
+
+/***********************************************************************
+Increments the buf_pool clock by one and returns its new value. Remember
+that in the 32 bit version the clock wraps around at 4 billion! */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void)
+/*====================*/
+			/* out: new clock value */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	
+	buf_pool->ulint_clock++;
+
+	return(buf_pool->ulint_clock);
+}
+
+/*************************************************************************
+Gets a pointer to the memory frame of a block. */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+				/* out: pointer to the frame */
+	buf_block_t*	block)	/* in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_ad(block >= buf_pool->blocks);
+	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+	ut_ad(block->state != BUF_BLOCK_NOT_USED); 
+	ut_ad((block->state != BUF_BLOCK_FILE_PAGE) 
+	      || (block->buf_fix_count > 0));
+	
+	return(block->frame);
+}	
+
+/*************************************************************************
+Gets the space id of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+				/* out: space id */
+	buf_block_t*	block)	/* in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_ad(block >= buf_pool->blocks);
+	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+	
+	return(block->space);
+}	
+
+/*************************************************************************
+Gets the page number of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+				/* out: page number */
+	buf_block_t*	block)	/* in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_ad(block >= buf_pool->blocks);
+	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+	
+	return(block->offset);
+}	
+
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align(
+/*============*/
+			/* out: pointer to block */
+	byte*	ptr)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+	buf_frame_t*	frame_zero;
+
+	ut_ad(ptr);
+
+	frame_zero = buf_pool->frame_zero;
+
+	ut_ad((ulint)ptr >= (ulint)frame_zero);
+
+	block = buf_pool_get_nth_block(buf_pool, (ptr - frame_zero)
+						>> UNIV_PAGE_SIZE_SHIFT);
+	return(block);
+}	
+
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. Does not
+require a file page to be bufferfixed. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align_low(
+/*================*/
+			/* out: pointer to block */
+	byte*	ptr)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+	buf_frame_t*	frame_zero;
+
+	ut_ad(ptr);
+
+	frame_zero = buf_pool->frame_zero;
+
+	ut_ad((ulint)ptr >= (ulint)frame_zero);
+
+	block = buf_pool_get_nth_block(buf_pool, (ptr - frame_zero)
+						>> UNIV_PAGE_SIZE_SHIFT);
+	return(block);
+}	
+
+/***********************************************************************
+Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+			/* out: pointer to block */
+	byte*	ptr)	/* in: pointer to a frame */
+{
+	buf_frame_t*	frame;
+
+	ut_ad(ptr);
+
+	frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
+
+	ut_ad((ulint)frame
+		>= (ulint)(buf_pool_get_nth_block(buf_pool, 0)->frame));
+	ut_ad((ulint)frame <= (ulint)(buf_pool_get_nth_block(buf_pool,
+					buf_pool->max_size - 1)->frame));
+	return(frame);
+}
+
+/**************************************************************************
+Gets the page number of a pointer pointing within a buffer frame containing
+a file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_page_no(
+/*==================*/
+			/* out: page number */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	return(buf_block_get_page_no(buf_block_align(ptr)));
+}
+
+/**************************************************************************
+Gets the space id of a pointer pointing within a buffer frame containing a
+file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_space_id(
+/*===================*/
+			/* out: space id */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	return(buf_block_get_space(buf_block_align(ptr)));
+}
+
+/**************************************************************************
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	byte*		ptr,	/* in: pointer to a buffer frame */
+	ulint*		space,	/* out: space id */
+	fil_addr_t*	addr)	/* out: page offset and byte offset */
+{
+	buf_block_t*	block;
+
+	block = buf_block_align(ptr);
+
+	*space = buf_block_get_space(block);
+	addr->page = buf_block_get_page_no(block);
+	addr->boffset = ptr - buf_frame_align(ptr);
+}
+
+/**************************************************************************
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table. */
+UNIV_INLINE
+ulint
+buf_frame_get_lock_hash_val(
+/*========================*/
+			/* out: lock hash value */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	buf_block_t*	block;
+
+	block = buf_block_align(ptr);
+
+	return(block->lock_hash_val);
+}
+
+/**************************************************************************
+Gets the mutex number protecting the page record lock hash chain in the lock
+table. */
+UNIV_INLINE
+mutex_t*
+buf_frame_get_lock_mutex(
+/*=====================*/
+			/* out: mutex */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	buf_block_t*	block;
+
+	block = buf_block_align(ptr);
+
+	return(block->lock_mutex);
+}
+
+/*************************************************************************
+Copies contents of a buffer frame to a given buffer. */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+				/* out: buf */
+	byte*		buf,	/* in: buffer to copy to */
+	buf_frame_t*	frame)	/* in: buffer frame */
+{
+	ut_ad(buf && frame);
+
+	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+
+	return(buf);
+}
+
+/************************************************************************
+Calculates a folded value of a file page address to use in the page hash
+table. */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+			/* out: the folded value */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: offset of the page within space */
+{
+	return((space << 20) + space + offset);
+}	
+
+/************************************************************************
+This function is used to get info if there is an io operation
+going on on a buffer page. */
+UNIV_INLINE
+ibool
+buf_page_io_query(
+/*==============*/
+				/* out: TRUE if io going on */
+	buf_block_t*	block)	/* in: buf_pool block, must be bufferfixed */
+{
+	mutex_enter(&(buf_pool->mutex));
+
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+
+	if (block->io_fix != 0) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(TRUE);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(FALSE);
+}
+
+/************************************************************************
+Gets the youngest modification log sequence number for a frame. Returns zero
+if not a file page or no modification occurred yet. */
+UNIV_INLINE
+dulint
+buf_frame_get_newest_modification(
+/*==============================*/
+				/* out: newest modification to the page */
+	buf_frame_t*	frame)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+	dulint		lsn;
+	
+	ut_ad(frame);
+
+	block = buf_block_align(frame);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (block->state == BUF_BLOCK_FILE_PAGE) {
+		lsn = block->newest_modification;
+	} else {
+		lsn = ut_dulint_zero;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(lsn);
+}
+
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_frame_modify_clock_inc(
+/*=======================*/
+				/* out: new value */
+	buf_frame_t*	frame)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+
+	ut_ad(frame);
+
+	block = buf_block_align_low(frame);
+
+	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+
+	UT_DULINT_INC(block->modify_clock);
+
+	return(block->modify_clock);
+}
+
+/************************************************************************
+Returns the value of the modify clock. The caller must have an s-lock 
+or x-lock on the block. */
+UNIV_INLINE
+dulint
+buf_frame_get_modify_clock(
+/*=======================*/
+				/* out: value */
+	buf_frame_t*	frame)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+
+	ut_ad(frame);
+
+	block = buf_block_align(frame);
+
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+
+	return(block->modify_clock);
+}
+
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_debug(
+/*========================*/
+	buf_block_t*	block,	/* in: block to bufferfix */
+	char*		file,	/* in: file name */
+	ulint		line)	/* in: line */
+{
+	ibool	ret;
+	
+	ret = rw_lock_s_lock_func_nowait(&(block->debug_latch)
+#ifdef UNIV_SYNC_DEBUG	
+				,file, line
+#endif
+			    );
+
+	ut_ad(ret == TRUE);
+
+	block->buf_fix_count++;
+}
+
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc(
+/*==================*/
+	buf_block_t*	block)	/* in: block to bufferfix */
+{
+	block->buf_fix_count++;
+}
+
+/**********************************************************************
+Returns the control block of a file page, NULL if not found. */
+UNIV_INLINE
+buf_block_t*
+buf_page_hash_get(
+/*==============*/
+			/* out: block, NULL if not found */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: offset of the page within space */
+{
+	buf_block_t*	block;
+	ulint		fold;
+	
+	ut_ad(buf_pool);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	/* Look for the page in the hash table */
+
+	fold = buf_page_address_fold(space, offset);
+
+	HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
+			(block->space == space) && (block->offset == offset));
+	return(block);
+}
+
+/************************************************************************
+Tries to get the page, but if file io is required, releases all latches
+in mtr down to the given savepoint. If io is required, this function
+retrieves the page to buffer buf_pool, but does not bufferfix it or latch
+it. */
+UNIV_INLINE
+buf_frame_t*
+buf_page_get_release_on_io(
+/*=======================*/
+				/* out: pointer to the frame, or NULL
+				if not in buffer buf_pool */
+	ulint	space,		/* in: space id */
+	ulint	offset,		/* in: offset of the page within space
+				in units of a page */
+	buf_frame_t* guess,	/* in: guessed frame or NULL */
+	ulint	rw_latch,	/* in: RW_X_LATCH, RW_S_LATCH,
+				or RW_NO_LATCH */
+	ulint	savepoint,	/* in: mtr savepoint */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	buf_frame_t*	frame;
+
+	frame = buf_page_get_gen(space, offset, rw_latch, guess,
+				BUF_GET_IF_IN_POOL,
+#ifdef UNIV_SYNC_DEBUG
+				__FILE__, __LINE__,
+#endif
+				mtr);
+	if (frame != NULL) {
+
+		return(frame);
+	}
+
+	/* The page was not in the buffer buf_pool: release the latches
+	down to the savepoint */
+		
+	mtr_rollback_to_savepoint(mtr, savepoint);
+		
+	buf_page_get(space, offset, RW_S_LATCH, mtr);
+		
+	/* When we get here, the page is in buffer, but we release
+	the latches again down to the savepoint, before returning */
+
+	mtr_rollback_to_savepoint(mtr, savepoint);
+		
+	return(NULL);
+}
+
+/************************************************************************
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/* in: buffer block */
+	ulint		rw_latch,	/* in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ulint	buf_fix_count;
+	
+	ut_ad(block);
+
+	mutex_enter_fast(&(buf_pool->mutex));
+
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+
+	if (rw_latch == RW_X_LATCH && mtr->modifications) {
+
+		buf_flush_note_modification(block, mtr);
+	}
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_s_unlock(&(block->debug_latch));
+#endif
+	buf_fix_count = block->buf_fix_count;
+	block->buf_fix_count = buf_fix_count - 1;
+
+	mutex_exit(&(buf_pool->mutex));
+
+	if (rw_latch == RW_S_LATCH) {
+		rw_lock_s_unlock(&(block->lock));
+	} else if (rw_latch == RW_X_LATCH) {
+		rw_lock_x_unlock(&(block->lock));
+	}
+}
+
+/*************************************************************************
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. If
+UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
+UNIV_INLINE
+void
+buf_page_dbg_add_level(
+/*===================*/
+	buf_frame_t*	frame,	/* in: buffer page where we have acquired
+				a latch */
+	ulint		level)	/* in: latching order level */
+{
+#ifdef UNIV_SYNC_DEBUG
+	sync_thread_add_level(&(buf_block_align(frame)->lock), level);
+#endif
+}
diff --git a/innobase/include/buf0flu.h b/innobase/include/buf0flu.h
new file mode 100644
index 00000000000..9317950904f
--- /dev/null
+++ b/innobase/include/buf0flu.h
@@ -0,0 +1,110 @@
+/******************************************************
+The database buffer pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0flu_h
+#define buf0flu_h
+
+#include "univ.i"
+#include "buf0types.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+
+/************************************************************************
+Updates the flush system data structures when a write is completed. */
+
+void
+buf_flush_write_complete(
+/*=====================*/
+	buf_block_t*	block);	/* in: pointer to the block in question */
+/*************************************************************************
+Flushes pages from the end of the LRU list if there is too small
+a margin of replaceable pages there. */
+
+void
+buf_flush_free_margin(void);
+/*=======================*/
+/***********************************************************************
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages! */
+
+ulint
+buf_flush_batch(
+/*============*/
+				/* out: number of blocks for which the write
+				request was queued */
+	ulint	flush_type,	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+				BUF_FLUSH_LIST, then the caller must not own
+				any latches on pages */
+	ulint	min_n,		/* in: wished minimum mumber of blocks flushed
+				(it is not guaranteed that the actual number
+				is that big, though) */
+	dulint	lsn_limit);	/* in the case BUF_FLUSH_LIST all blocks whose
+				oldest_modification is smaller than this
+				should be flushed (if their number does not
+				exceed min_n), otherwise ignored */
+/**********************************************************************
+Waits until a flush batch of the given type ends */
+
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+	ulint	type);	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+/************************************************************************
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/* in: block which is modified */
+	mtr_t*		mtr);	/* in: mtr */
+/************************************************************************
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/* in: block which is modified */
+	dulint		start_lsn,	/* in: start lsn of the first mtr in a
+					set of mtr's */
+	dulint		end_lsn);	/* in: end lsn of the last mtr in the
+					set of mtr's */
+/************************************************************************
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., transition FILE_PAGE => NOT_USED allowed. */
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+				/* out: TRUE if can replace immediately */
+	buf_block_t*	block);	/* in: buffer control block, must be in state
+				BUF_BLOCK_FILE_PAGE and in the LRU list */
+/**********************************************************************
+Validates the flush list. */
+
+ibool
+buf_flush_validate(void);
+/*====================*/
+		/* out: TRUE if ok */
+
+/* When buf_flush_free_margin is called, it tries to make this many blocks
+available to replacement in the free list and at the end of the LRU list (to
+make sure that a read-ahead batch can be read efficiently in a single
+sweep). */
+
+#define BUF_FLUSH_FREE_BLOCK_MARGIN 	(5 + BUF_READ_AHEAD_AREA)
+#define BUF_FLUSH_EXTRA_MARGIN 		(BUF_FLUSH_FREE_BLOCK_MARGIN / 4)
+
+#ifndef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+				
+#endif
diff --git a/innobase/include/buf0flu.ic b/innobase/include/buf0flu.ic
new file mode 100644
index 00000000000..e2faf773cab
--- /dev/null
+++ b/innobase/include/buf0flu.ic
@@ -0,0 +1,100 @@
+/******************************************************
+The database buffer pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0buf.h"
+#include "mtr0mtr.h"
+
+/************************************************************************
+Inserts a modified block into the flush list. */
+
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+	buf_block_t*	block);	/* in: block which is modified */
+/************************************************************************
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+	buf_block_t*	block);	/* in: block which is modified */
+
+/************************************************************************
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it is not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/* in: block which is modified */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(block);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
+	ut_ad(mtr->modifications);
+	ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
+	
+	block->newest_modification = mtr->end_lsn;
+
+	if (ut_dulint_is_zero(block->oldest_modification)) {
+		
+		block->oldest_modification = mtr->start_lsn;
+		ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+
+		buf_flush_insert_into_flush_list(block);
+	} else {
+		ut_ad(ut_dulint_cmp(block->oldest_modification,
+							mtr->start_lsn) <= 0);
+	}
+}
+
+/************************************************************************
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/* in: block which is modified */
+	dulint		start_lsn,	/* in: start lsn of the first mtr in a
+					set of mtr's */
+	dulint		end_lsn)	/* in: end lsn of the last mtr in the
+					set of mtr's */
+{
+	ut_ad(block);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+
+	mutex_enter(&(buf_pool->mutex));
+	
+	ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
+	
+	block->newest_modification = end_lsn;
+
+	if (ut_dulint_is_zero(block->oldest_modification)) {
+		
+		block->oldest_modification = start_lsn;
+
+		ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+
+		buf_flush_insert_sorted_into_flush_list(block);
+	} else {
+		ut_ad(ut_dulint_cmp(block->oldest_modification,
+							start_lsn) <= 0);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+}
diff --git a/innobase/include/buf0lru.h b/innobase/include/buf0lru.h
new file mode 100644
index 00000000000..946b6c4e31d
--- /dev/null
+++ b/innobase/include/buf0lru.h
@@ -0,0 +1,117 @@
+/******************************************************
+The database buffer pool LRU replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0lru_h
+#define buf0lru_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "buf0types.h"
+
+/**********************************************************************
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+
+void
+buf_LRU_try_free_flushed_blocks(void);
+/*==================================*/
+
+/*#######################################################################
+These are low-level functions
+#########################################################################*/
+
+/* Minimum LRU list length for which the LRU_old pointer is defined */
+
+#define BUF_LRU_OLD_MIN_LEN	80
+
+#define BUF_LRU_FREE_SEARCH_LEN		(5 + 2 * BUF_READ_AHEAD_AREA)
+
+/**********************************************************************
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around. */
+
+ulint
+buf_LRU_get_recent_limit(void);
+/*==========================*/
+			/* out: the limit; zero if could not determine it */
+/**********************************************************************
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list. */
+
+buf_block_t*
+buf_LRU_get_free_block(void);
+/*=========================*/
+				/* out: the free control block */
+/**********************************************************************
+Puts a block back to the free list. */
+
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+	buf_block_t*	block);	/* in: block, must not contain a file page */
+/**********************************************************************
+Adds a block to the LRU list. */
+
+void
+buf_LRU_add_block(
+/*==============*/
+	buf_block_t*	block,	/* in: control block */
+	ibool		old);	/* in: TRUE if should be put to the old
+				blocks in the LRU list, else put to the
+				start; if the LRU list is very short, added to
+				the start regardless of this parameter */
+/**********************************************************************
+Moves a block to the start of the LRU list. */
+
+void
+buf_LRU_make_block_young(
+/*=====================*/
+	buf_block_t*	block);	/* in: control block */
+/**********************************************************************
+Moves a block to the end of the LRU list. */
+
+void
+buf_LRU_make_block_old(
+/*===================*/
+	buf_block_t*	block);	/* in: control block */
+/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+				/* out: TRUE if freed */
+	ulint	n_iterations);	/* in: how many times this has been called
+				repeatedly without result: a high value
+				means that we should search farther */
+/**************************************************************************
+Validates the LRU list. */
+
+ibool
+buf_LRU_validate(void);
+/*==================*/
+/**************************************************************************
+Prints the LRU list. */
+
+void
+buf_LRU_print(void);
+/*===============*/
+
+#ifndef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#endif
diff --git a/innobase/include/buf0lru.ic b/innobase/include/buf0lru.ic
new file mode 100644
index 00000000000..7b8ee457b0b
--- /dev/null
+++ b/innobase/include/buf0lru.ic
@@ -0,0 +1,8 @@
+/******************************************************
+The database buffer replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/buf0rea.h b/innobase/include/buf0rea.h
new file mode 100644
index 00000000000..1efe67369ab
--- /dev/null
+++ b/innobase/include/buf0rea.h
@@ -0,0 +1,98 @@
+/******************************************************
+The database buffer read
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0rea_h
+#define buf0rea_h
+
+#include "univ.i"
+#include "buf0types.h"
+
+/************************************************************************
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread. Does a random read-ahead if it seems
+sensible. */
+
+ulint
+buf_read_page(
+/*==========*/
+			/* out: number of page read requests issued: this can
+			be > 1 if read-ahead occurred */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number */
+/************************************************************************
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io. */
+
+ulint
+buf_read_ahead_linear(
+/*==================*/
+			/* out: number of page read requests issued */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number of a page; NOTE: the current thread
+			must want access to this page (see NOTE 3 above) */
+/************************************************************************
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract insert buffer trees. Technically, this function is like
+a read-ahead function. */
+
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint	space,		/* in: space id */
+	ulint*	page_nos,	/* in: array of page numbers to read, with
+				the highest page number last in the array */
+	ulint	n_stored);	/* in: number of page numbers in the array */
+/************************************************************************
+Issues read requests for pages which recovery wants to read in. */
+
+void
+buf_read_recv_pages(
+/*================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint	space,		/* in: space id */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored);	/* in: number of page numbers in the array */
+
+/* The size in pages of the area which the read-ahead algorithms read if
+invoked */
+
+#define	BUF_READ_AHEAD_AREA	ut_min(32, buf_pool->curr_size / 16)
+
+/* Modes used in read-ahead */
+#define BUF_READ_IBUF_PAGES_ONLY	131
+#define BUF_READ_ANY_PAGE		132
+
+#endif
diff --git a/innobase/include/buf0types.h b/innobase/include/buf0types.h
new file mode 100644
index 00000000000..44fdfa80e73
--- /dev/null
+++ b/innobase/include/buf0types.h
@@ -0,0 +1,20 @@
+/******************************************************
+The database buffer pool global types for the directory
+
+(c) 1995 Innobase Oy
+
+Created 11/17/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0types_h
+#define buf0types_h
+
+typedef	struct buf_block_struct		buf_block_t;
+typedef	struct buf_pool_struct		buf_pool_t;
+
+/* The 'type' used of a buffer frame */
+typedef	byte	buf_frame_t;
+
+
+#endif
+
diff --git a/innobase/include/com0com.h b/innobase/include/com0com.h
new file mode 100644
index 00000000000..6f04b6a3f11
--- /dev/null
+++ b/innobase/include/com0com.h
@@ -0,0 +1,125 @@
+/******************************************************
+The communication primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+/* This module defines a standard datagram communication
+function interface for use in the database. We assume that
+the communication medium is reliable. */
+
+#ifndef com0com_h
+#define com0com_h
+
+#include "univ.i"
+
+/* The communications endpoint type definition */
+typedef struct com_endpoint_struct	com_endpoint_t;
+
+/* Possible endpoint communication types */
+#define	COM_SHM		1	/* communication through shared memory */
+
+/* Option numbers for endpoint */
+#define COM_OPT_MAX_DGRAM_SIZE	1
+
+/* Error numbers */
+#define COM_ERR_NOT_SPECIFIED			1
+#define COM_ERR_NOT_BOUND			2
+#define COM_ERR_ALREADY_BOUND			3
+#define COM_ERR_MAX_DATAGRAM_SIZE_NOT_SET	4
+#define COM_ERR_DATA_BUFFER_TOO_SMALL		5
+#define COM_ERR_ADDR_BUFFER_TOO_SMALL		6
+#define COM_ERR_DATA_TOO_LONG			7
+#define COM_ERR_ADDR_TOO_LONG			8
+#define COM_ERR_DGRAM_NOT_DELIVERED		9
+
+/* Maximum allowed address length in bytes */
+#define COM_MAX_ADDR_LEN	100
+
+/*************************************************************************
+Creates a communications endpoint. */
+
+com_endpoint_t*
+com_endpoint_create(
+/*================*/
+			/* out, own: communications endpoint, NULL if
+			did not succeed */
+	ulint	type);	/* in: communication type of endpoint:
+			only COM_SHM supported */
+/*************************************************************************
+Frees a communications endpoint. */
+
+ulint
+com_endpoint_free(
+/*==============*/
+				/* out: O if succeed, else error number */
+	com_endpoint_t*	ep);	/* in, own: communications endpoint */
+/*************************************************************************
+Sets an option, like the maximum datagram size for an endpoint.
+The options may vary depending on the endpoint type. */
+
+ulint
+com_endpoint_set_option(
+/*====================*/
+				/* out: 0 if succeed, else error number */
+	com_endpoint_t*	ep,	/* in: endpoint */
+	ulint		optno,	/* in: option number, only
+				COM_OPT_MAX_DGRAM_SIZE currently supported */
+	byte*		optval,	/* in: pointer to a buffer containing the
+				option value to set */
+	ulint		optlen);/* in: option value buffer length */
+/*************************************************************************
+Binds a communications endpoint to a specified address. */
+
+ulint
+com_bind(
+/*=====*/
+				/* out: 0 if succeed, else error number */
+	com_endpoint_t*	ep,	/* in: communications endpoint */
+	char*		name,	/* in: address name */
+	ulint		len);	/* in: name length */
+/*************************************************************************
+Waits for a datagram to arrive at an endpoint. */
+
+ulint
+com_recvfrom(
+/*=========*/
+				/* out: 0 if succeed, else error number */
+	com_endpoint_t*	ep,	/* in: communications endpoint */
+	byte*		buf,	/* out: datagram buffer; the buffer must be
+				supplied by the caller */
+	ulint		buf_len,/* in: datagram buffer length */
+	ulint*		len,	/* out: datagram length */
+	char*		from,	/* out: address name buffer; the buffer must be
+				supplied by the caller */
+	ulint		from_len,/* in: address name buffer length */
+	ulint*		addr_len);/* out: address name length */
+/*************************************************************************
+Sends a datagram to a specified destination. */
+
+ulint
+com_sendto(
+/*=======*/
+				/* out: 0 if succeed, else error number */
+	com_endpoint_t*	ep,	/* in: communications endpoint */
+	byte*		buf,	/* in: datagram buffer */
+	ulint		len,	/* in: datagram length */
+	char*		to,	/* in: address name buffer */
+	ulint		tolen);	/* in: address name length */
+/*************************************************************************
+Gets the maximum datagram size for an endpoint. */
+
+ulint
+com_endpoint_get_max_size(
+/*======================*/
+				/* out: maximum size */
+	com_endpoint_t*	ep);	/* in: endpoint */
+
+
+#ifndef UNIV_NONINL
+#include "com0com.ic"
+#endif
+
+#endif
diff --git a/innobase/include/com0com.ic b/innobase/include/com0com.ic
new file mode 100644
index 00000000000..cec1cb190cc
--- /dev/null
+++ b/innobase/include/com0com.ic
@@ -0,0 +1,7 @@
+/******************************************************
+The communication primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/com0shm.h b/innobase/include/com0shm.h
new file mode 100644
index 00000000000..7de9c4ac2de
--- /dev/null
+++ b/innobase/include/com0shm.h
@@ -0,0 +1,103 @@
+/******************************************************
+The communication through shared memory
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef com0shm_h
+#define com0shm_h
+
+#include "univ.i"
+
+typedef struct com_shm_endpoint_struct	com_shm_endpoint_t;
+
+/* The performance of communication in NT depends on how
+many times a system call is made (excluding os_thread_yield,
+as that is the fastest way to switch thread).
+The following variable counts such events. */
+
+extern ulint	com_shm_system_call_count;
+
+
+/*************************************************************************
+Creates a communications endpoint. */
+
+com_shm_endpoint_t*
+com_shm_endpoint_create(void);
+/*=========================*/
+			/* out, own: communications endpoint, NULL if
+			did not succeed */
+/*************************************************************************
+Frees a communications endpoint. */
+
+ulint
+com_shm_endpoint_free(
+/*==================*/
+				/* out: O if succeed, else error number */
+	com_shm_endpoint_t* ep);/* in, own: communications endpoint */
+/*************************************************************************
+Sets an option, like the maximum datagram size for an endpoint.
+The options may vary depending on the endpoint type. */
+
+ulint
+com_shm_endpoint_set_option(
+/*========================*/
+				/* out: 0 if succeed, else error number */
+	com_shm_endpoint_t* 	ep,	/* in: endpoint */
+	ulint		optno,	/* in: option number, only
+				COM_OPT_MAX_DGRAM_SIZE currently supported */
+	byte*		optval,	/* in: pointer to a buffer containing the
+				option value to set */
+	ulint		optlen);/* in: option value buffer length */
+/*************************************************************************
+Bind a communications endpoint to a specified address. */
+
+ulint
+com_shm_bind(
+/*=========*/
+				/* out: 0 if succeed, else error number */
+	com_shm_endpoint_t* ep,	/* in: communications endpoint */
+	char*		name,	/* in: address name */
+	ulint		len);	/* in: address name length */
+/*************************************************************************
+Waits for a datagram to arrive at an endpoint. */
+
+ulint
+com_shm_recvfrom(
+/*=============*/
+				/* out: 0 if succeed, else error number */
+	com_shm_endpoint_t* ep,	/* in: communications endpoint */
+	byte*		buf,	/* out: datagram buffer; the buffer is
+				supplied by the caller */
+	ulint		buf_len,/* in: datagram buffer length */
+	ulint*		len,	/* out: datagram length */
+	char*		from,	/* out: address name buffer; the buffer is
+				supplied by the caller */
+	ulint		from_len,/* in: address name buffer length */
+	ulint*		addr_len);/* out: address name length */
+/*************************************************************************
+Sends a datagram to the specified destination. */
+
+ulint
+com_shm_sendto(
+/*===========*/
+				/* out: 0 if succeed, else error number */
+	com_shm_endpoint_t* ep,	/* in: communications endpoint */
+	byte*		buf,	/* in: datagram buffer */
+	ulint		len,	/* in: datagram length */
+	char*		to,	/* in: address name buffer */
+	ulint		tolen);	/* in: address name length */
+
+ulint
+com_shm_endpoint_get_size(
+/*======================*/
+	com_shm_endpoint_t*	ep);
+
+
+#ifndef UNIV_NONINL
+#include "com0shm.ic"
+#endif
+
+#endif
diff --git a/innobase/include/com0shm.ic b/innobase/include/com0shm.ic
new file mode 100644
index 00000000000..e0d3cb26f69
--- /dev/null
+++ b/innobase/include/com0shm.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Communication through shared memory
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h
new file mode 100644
index 00000000000..d7f0986b0b6
--- /dev/null
+++ b/innobase/include/data0data.h
@@ -0,0 +1,430 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0data_h
+#define data0data_h
+
+#include "univ.i"
+
+#include "data0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+
+/* Some non-inlined functions used in the MySQL interface: */
+void 
+dfield_set_data_noninline(
+	dfield_t* 	field,	/* in: field */
+	void*		data,	/* in: data */
+	ulint		len);	/* in: length or UNIV_SQL_NULL */
+void* 
+dfield_get_data_noninline(
+	dfield_t* field);	/* in: field */
+ulint
+dfield_get_len_noninline(
+	dfield_t* field);	/* in: field */
+ulint 
+dtuple_get_n_fields_noninline(
+	dtuple_t* 	tuple);	/* in: tuple */
+dfield_t* 
+dtuple_get_nth_field_noninline(
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n);	/* in: index of field */
+
+/*************************************************************************
+Gets pointer to the type struct of SQL data field. */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+				/* out: pointer to the type struct */
+	dfield_t*	field);	/* in: SQL data field */
+/*************************************************************************
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/* in: SQL data field */
+	dtype_t*	type);	/* in: pointer to data type struct */
+/*************************************************************************
+Gets pointer to the data in a field. */
+UNIV_INLINE
+void* 
+dfield_get_data(
+/*============*/
+				/* out: pointer to data */
+	dfield_t* field);	/* in: field */
+/*************************************************************************
+Gets length of field data. */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+				/* out: length of data; UNIV_SQL_NULL if 
+				SQL null data */
+	dfield_t* field);	/* in: field */
+/*************************************************************************
+Sets length in a field. */
+UNIV_INLINE
+void 
+dfield_set_len(
+/*===========*/
+	dfield_t* 	field,	/* in: field */
+	ulint		len);	/* in: length or UNIV_SQL_NULL */
+/*************************************************************************
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void 
+dfield_set_data(
+/*============*/
+	dfield_t* 	field,	/* in: field */
+	void*		data,	/* in: data */
+	ulint		len);	/* in: length or UNIV_SQL_NULL */
+/**************************************************************************
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/* in: pointer to a buffer of size len */
+	ulint	len);	/* in: SQL null size in bytes */
+/*************************************************************************
+Copies the data and len fields. */
+UNIV_INLINE
+void 
+dfield_copy_data(
+/*=============*/
+	dfield_t* 	field1,	/* in: field to copy to */
+	dfield_t*	field2);/* in: field to copy from */
+/*************************************************************************
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/* in: field to copy to */
+	dfield_t*	field2);/* in: field to copy from */
+/*************************************************************************
+Tests if data length and content is equal for two dfields. */
+UNIV_INLINE
+ibool
+dfield_datas_are_equal(
+/*===================*/
+				/* out: TRUE if equal */
+	dfield_t*	field1,	/* in: field */
+	dfield_t*	field2);/* in: field */
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+UNIV_INLINE
+ibool
+dfield_data_is_equal(
+/*=================*/
+				/* out: TRUE if equal */
+	dfield_t*	field,	/* in: field */
+	ulint		len,	/* in: data length or UNIV_SQL_NULL */
+	byte*		data);	/* in: data */
+/*************************************************************************
+Gets number of fields in a data tuple. */
+UNIV_INLINE
+ulint 
+dtuple_get_n_fields(
+/*================*/
+				/* out: number of fields */
+	dtuple_t* 	tuple);	/* in: tuple */
+/*************************************************************************
+Gets nth field of a tuple. */
+UNIV_INLINE
+dfield_t* 
+dtuple_get_nth_field(
+/*=================*/
+				/* out: nth field */
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n);	/* in: index of field */
+/*************************************************************************
+Gets info bits in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+				/* out: info bits */
+	dtuple_t* 	tuple);	/* in: tuple */
+/*************************************************************************
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t* 	tuple,		/* in: tuple */
+	ulint		info_bits);	/* in: info bits */
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+				/* out: number of fields used in comparisons
+				in rem0cmp.* */
+	dtuple_t*	tuple);	/* in: tuple */
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields_cmp);	/* in: number of fields used in
+					comparisons in rem0cmp.* */
+/**************************************************************
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields. */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	 	 		/* out, own: created tuple */
+	mem_heap_t*	heap,	/* in: memory heap where the tuple
+				is created */
+	ulint		n_fields); /* in: number of fields */	
+
+/*************************************************************************
+Creates a dtuple for use in MySQL. */
+
+dtuple_t*
+dtuple_create_for_mysql(
+/*====================*/
+			/* out, own created dtuple */
+	void** heap,    /* out: created memory heap */
+	ulint n_fields); /* in: number of fields */
+/*************************************************************************
+Frees a dtuple used in MySQL. */
+
+void
+dtuple_free_for_mysql(
+/*==================*/
+	void* heap);
+/*************************************************************************
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */ 
+
+void
+dtuple_set_n_fields(
+/*================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields);	/* in: number of fields */
+/**************************************************************
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+				/* out: sum of data lens */
+	dtuple_t*	tuple);	/* in: typed data tuple */
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal. */
+UNIV_INLINE
+ibool
+dtuple_datas_are_equal(
+/*===================*/
+				/* out: TRUE if length and datas are equal */
+	dtuple_t*	tuple1,	/* in: tuple 1 */
+	dtuple_t*	tuple2);	/* in: tuple 2 */
+/****************************************************************
+Folds a prefix given as the number of fields of a tuple. */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+				/* out: the folded value */
+	dtuple_t*	tuple,	/* in: the tuple */
+	ulint		n_fields,/* in: number of complete fields to fold */
+	ulint		n_bytes,/* in: number of bytes to fold in an
+				incomplete last field */
+	dulint		tree_id);/* in: index tree id */
+/***********************************************************************
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		n);	/* in: number of fields to set */
+/**************************************************************
+Checks that a data field is typed. Asserts an error if not. */
+
+ibool
+dfield_check_typed(
+/*===============*/
+				/* out: TRUE if ok */
+	dfield_t*	field);	/* in: data field */
+/**************************************************************
+Checks that a data tuple is typed. Asserts an error if not. */
+
+ibool
+dtuple_check_typed(
+/*===============*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple);	/* in: tuple */
+/**************************************************************
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set. */
+
+ibool
+dtuple_validate(
+/*============*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple);	/* in: tuple */
+/*****************************************************************
+Pretty prints a dfield value according to its data type. */
+
+void
+dfield_print(
+/*=========*/
+	dfield_t*	dfield);/* in: dfield */
+/*****************************************************************
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */ 
+
+void
+dfield_print_also_hex(
+/*==================*/
+	dfield_t*	dfield);	 /* in: dfield */
+/**************************************************************
+The following function prints the contents of a tuple. */
+
+void
+dtuple_print(
+/*=========*/
+	dtuple_t*	tuple);	/* in: tuple */
+/**************************************************************
+The following function prints the contents of a tuple to a buffer. */
+
+ulint
+dtuple_sprintf(
+/*===========*/
+				/* out: printed length in bytes */
+	char*		buf,	/* in: print buffer */
+	ulint		buf_len,/* in: buf length in bytes */
+	dtuple_t*	tuple);	/* in: tuple */
+/***************************************************************
+Generates a random tuple. */
+
+dtuple_t*
+dtuple_gen_rnd_tuple(
+/*=================*/
+				/* out: pointer to the tuple */
+	mem_heap_t*	heap);	/* in: memory heap where generated */
+/*******************************************************************
+Generates a test tuple for sort and comparison tests. */
+
+void
+dtuple_gen_test_tuple(
+/*==================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with 3 fields */
+	ulint		i);	/* in: a number, 0 <= i < 512 */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_test_tuple3(
+/*===================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with 3 fields */
+	ulint		i,	/* in: a number < 1000000 */
+	ulint		type,	/* in: DTUPLE_TEST_FIXED30, ... */
+	byte*		buf);	/* in: a buffer of size >= 8 bytes */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple3(
+/*=====================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with 1 or 2 fields */
+	ulint		i,	/* in: a number < 1000000 */
+	byte*		buf);	/* in: a buffer of size >= 8 bytes */
+/*******************************************************************
+Generates a test tuple for TPC-A speed test. */
+
+void
+dtuple_gen_test_tuple_TPC_A(
+/*========================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with >= 3 fields */
+	ulint		i,	/* in: a number < 10000 */
+	byte*		buf);	/* in: a buffer of size >= 16 bytes */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple_TPC_A(
+/*==========================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with 1 field */
+	ulint		i,	/* in: a number < 10000 */
+	byte*		buf);	/* in: a buffer of size >= 16 bytes */
+/*******************************************************************
+Generates a test tuple for TPC-C speed test. */
+
+void
+dtuple_gen_test_tuple_TPC_C(
+/*========================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with >= 12 fields */
+	ulint		i,	/* in: a number < 100000 */
+	byte*		buf);	/* in: a buffer of size >= 16 bytes */
+/*******************************************************************
+Generates a test tuple for B-tree speed tests. */
+
+void
+dtuple_gen_search_tuple_TPC_C(
+/*==========================*/
+	dtuple_t*	tuple,	/* in/out: a tuple with 1 field */
+	ulint		i,	/* in: a number < 100000 */
+	byte*		buf);	/* in: a buffer of size >= 16 bytes */
+
+/* Types of the third field in dtuple_gen_test_tuple3 */	
+#define DTUPLE_TEST_FIXED30	1
+#define DTUPLE_TEST_RND30	2
+#define DTUPLE_TEST_RND3500	3
+#define DTUPLE_TEST_FIXED2000	4
+#define DTUPLE_TEST_FIXED3	5
+
+/*######################################################################*/
+
+/* Structure for an SQL data field */
+struct dfield_struct{
+	void*		data;	/* pointer to data */
+	ulint		len;	/* data length; UNIV_SQL_NULL if SQL null */
+	dtype_t		type;	/* type of data */
+	ulint		col_no;	/* when building index entries, the column
+				number can be stored here */
+};
+
+struct dtuple_struct {
+	ulint		info_bits;	/* info bits of an index record:
+					default is 0; this field is used
+					if an index record is built from
+					a data tuple */
+	ulint		n_fields;	/* number of fields in dtuple */
+	ulint		n_fields_cmp;	/* number of fields which should
+					be used in comparison services
+					of rem0cmp.*; the index search
+					is performed by comparing only these
+					fields, others are ignored; the
+					default value in dtuple creation is
+					the same value as n_fields */
+	dfield_t*	fields;		/* fields */
+	UT_LIST_NODE_T(dtuple_t) tuple_list;
+					/* data tuples can be linked into a
+					list using this field */
+	ulint		magic_n;	
+};
+#define	DATA_TUPLE_MAGIC_N	65478679
+
+#ifndef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#endif
diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic
new file mode 100644
index 00000000000..27b5552d338
--- /dev/null
+++ b/innobase/include/data0data.ic
@@ -0,0 +1,491 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0mem.h"
+#include "ut0rnd.h"
+
+extern byte data_error;
+
+/*************************************************************************
+Gets pointer to the type struct of SQL data field. */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+				/* out: pointer to the type struct */
+	dfield_t*	field)	/* in: SQL data field */
+{
+	ut_ad(field);
+
+	return(&(field->type));
+}
+
+/*************************************************************************
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/* in: SQL data field */
+	dtype_t*	type)	/* in: pointer to data type struct */
+{
+	ut_ad(field && type);
+
+	field->type = *type;
+}
+
+/*************************************************************************
+Gets pointer to the data in a field. */
+UNIV_INLINE
+void* 
+dfield_get_data(
+/*============*/
+				/* out: pointer to data */
+	dfield_t* field)	/* in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error)); 
+
+	return(field->data);
+}
+
+/*************************************************************************
+Gets length of field data. */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+				/* out: length of data; UNIV_SQL_NULL if 
+				SQL null data */
+	dfield_t* field)	/* in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error));
+
+	return(field->len);
+}
+
+/*************************************************************************
+Sets length in a field. */
+UNIV_INLINE
+void 
+dfield_set_len(
+/*===========*/
+	dfield_t* 	field,	/* in: field */
+	ulint		len)	/* in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+
+	field->len = len;
+}
+
+/*************************************************************************
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void 
+dfield_set_data(
+/*============*/
+	dfield_t* 	field,	/* in: field */
+	void*		data,	/* in: data */
+	ulint		len)	/* in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+
+	field->data = data;
+	field->len = len;
+}
+
+/*************************************************************************
+Copies the data and len fields. */
+UNIV_INLINE
+void 
+dfield_copy_data(
+/*=============*/
+	dfield_t* 	field1,	/* in: field to copy to */
+	dfield_t*	field2)	/* in: field to copy from */
+{
+	ut_ad(field1 && field2);
+
+	field1->data = field2->data;
+	field1->len = field2->len;
+}
+
+/*************************************************************************
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/* in: field to copy to */
+	dfield_t*	field2)	/* in: field to copy from */
+{
+	*field1 = *field2;
+}
+
+/*************************************************************************
+Tests if data length and content is equal for two dfields. */
+UNIV_INLINE
+ibool
+dfield_datas_are_equal(
+/*===================*/
+				/* out: TRUE if equal */
+	dfield_t*	field1,	/* in: field */
+	dfield_t*	field2)	/* in: field */
+{
+	ulint	len;
+
+	len = field1->len;
+	
+	if ((len != field2->len)
+	    || ((len != UNIV_SQL_NULL)
+	        && (0 != ut_memcmp(field1->data, field2->data, len)))) {
+	    	
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+UNIV_INLINE
+ibool
+dfield_data_is_equal(
+/*=================*/
+				/* out: TRUE if equal */
+	dfield_t*	field,	/* in: field */
+	ulint		len,	/* in: data length or UNIV_SQL_NULL */
+	byte*		data)	/* in: data */
+{
+	if (len != field->len) {
+
+		return(FALSE);
+	}
+
+	if ((len != UNIV_SQL_NULL)
+	    && (0 != ut_memcmp(field->data, data, len))) {
+	    	
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Gets info bits in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+				/* out: info bits */
+	dtuple_t* 	tuple)	/* in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->info_bits);
+}
+
+/*************************************************************************
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t* 	tuple,		/* in: tuple */
+	ulint		info_bits)	/* in: info bits */
+{
+	ut_ad(tuple);
+
+	tuple->info_bits = info_bits;
+}
+
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+				/* out: number of fields used in comparisons
+				in rem0cmp.* */
+	dtuple_t*	tuple)	/* in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields_cmp);
+}
+
+/*************************************************************************
+Sets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields_cmp)	/* in: number of fields used in
+					comparisons in rem0cmp.* */
+{
+	ut_ad(tuple);
+	ut_ad(n_fields_cmp <= tuple->n_fields);
+
+	tuple->n_fields_cmp = n_fields_cmp;
+}
+
+/*************************************************************************
+Gets number of fields in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+				/* out: number of fields */
+	dtuple_t* 	tuple)	/* in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields);
+}
+
+/*************************************************************************
+Gets nth field of a tuple. */
+UNIV_INLINE
+dfield_t* 
+dtuple_get_nth_field(
+/*=================*/
+				/* out: nth field */
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n)	/* in: index of field */
+{
+	ut_ad(tuple);
+	ut_ad(n < tuple->n_fields);
+
+	return(tuple->fields + n);
+}
+
+/**************************************************************
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields. */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	 	 		/* out, own: created tuple */
+	mem_heap_t*	heap,	/* in: memory heap where the tuple
+				is created */
+	ulint		n_fields) /* in: number of fields */	
+{
+	dtuple_t*	tuple;	
+
+	ut_ad(heap);
+
+	tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+				     + n_fields * sizeof(dfield_t));
+	tuple->info_bits = 0;
+	tuple->n_fields = n_fields;
+	tuple->n_fields_cmp = n_fields;
+	tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
+
+#ifdef UNIV_DEBUG
+	tuple->magic_n = DATA_TUPLE_MAGIC_N;
+
+	{	/* In the debug version, initialize fields to an error value */
+		ulint	i;
+		
+		for (i = 0; i < n_fields; i++) {
+			(tuple->fields + i)->data = &data_error;
+			dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
+		}
+	}
+#endif
+	return(tuple);	
+}
+
+/**************************************************************
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+				/* out: sum of data lens */
+	dtuple_t*	tuple)	/* in: typed data tuple */
+{
+	dfield_t*	field;
+	ulint	 	n_fields;
+	ulint	 	len;
+	ulint	 	i;
+	ulint	 	sum	= 0;
+
+	ut_ad(tuple);
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+
+	n_fields = tuple->n_fields;
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple,  i);
+		len = dfield_get_len(field);
+
+		if (len == UNIV_SQL_NULL) {
+			len = dtype_get_sql_null_size(dfield_get_type(field));
+		}
+
+		sum += len;
+	}
+	
+	return(sum);
+}
+
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal. */
+UNIV_INLINE
+ibool
+dtuple_datas_are_equal(
+/*===================*/
+				/* out: TRUE if length and datas are equal */
+	dtuple_t*	tuple1,	/* in: tuple 1 */
+	dtuple_t*	tuple2)	/* in: tuple 2 */
+{
+	dfield_t*	field1;
+	dfield_t*	field2;
+	ulint		n_fields;
+	byte*		data1;
+	byte*		data2;
+	ulint		len1;
+	ulint		len2;
+	ulint		i;
+
+	ut_ad(tuple1 && tuple2);
+	ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N);
+	ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple1));
+	ut_ad(dtuple_check_typed(tuple2));
+
+	n_fields = dtuple_get_n_fields(tuple1);
+
+	if (n_fields != dtuple_get_n_fields(tuple2)) {
+
+		return(FALSE);
+	}
+	
+	for (i = 0; i < n_fields; i++) {
+
+		field1 = dtuple_get_nth_field(tuple1, i);
+		data1 = (byte*) dfield_get_data(field1);
+		len1 = dfield_get_len(field1);	
+
+		field2 = dtuple_get_nth_field(tuple2, i);
+		data2 = (byte*) dfield_get_data(field2);
+		len2 = dfield_get_len(field2);	
+
+		if (len1 != len2) {
+
+			return(FALSE);
+		}
+
+		if (len1 != UNIV_SQL_NULL) {
+			if (ut_memcmp(data1, data2, len1) != 0) {
+
+				return(FALSE);
+			}
+		}
+	}
+	
+	return(TRUE);
+}
+
+/***********************************************************************
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		n)	/* in: number of fields to set */
+{
+	dtype_t*	dfield_type;
+	ulint		i;
+	
+	for (i = 0; i < n; i++) {
+		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+		dtype_set(dfield_type, DATA_BINARY, 0, 0, 0);
+	}
+}
+
+/****************************************************************
+Folds a prefix given as the number of fields of a tuple. */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+				/* out: the folded value */
+	dtuple_t*	tuple,	/* in: the tuple */
+	ulint		n_fields,/* in: number of complete fields to fold */
+	ulint		n_bytes,/* in: number of bytes to fold in an
+				incomplete last field */
+	dulint		tree_id)/* in: index tree id */
+{
+	dfield_t*	field;
+	ulint		i;
+	byte*		data;
+	ulint		len;
+	ulint		fold;
+
+	ut_ad(tuple);
+	ut_ad(tuple->magic_n = DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple));
+
+	fold = ut_fold_dulint(tree_id);
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (byte*) dfield_get_data(field);
+		len = dfield_get_len(field);	
+		
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold, 
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (byte*) dfield_get_data(field);
+		len = dfield_get_len(field);	
+		
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+
+			fold = ut_fold_ulint_pair(fold, 
+						  ut_fold_binary(data, len));
+		}
+	}	
+
+	return(fold);
+}
+
+/**************************************************************************
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/* in: pointer to a buffer of size len */
+	ulint	len)	/* in: SQL null size in bytes */
+{
+	ulint	j;
+
+	for (j = 0; j < len; j++) {
+		data[j] = '\0';
+	}
+}
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
new file mode 100644
index 00000000000..4817f0ca839
--- /dev/null
+++ b/innobase/include/data0type.h
@@ -0,0 +1,214 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef data0type_h
+#define data0type_h
+
+#include "univ.i"
+
+/* SQL data type struct */
+typedef struct dtype_struct		dtype_t;
+
+/* This variable is initialized as the standard binary variable length
+data type */
+extern dtype_t* 	dtype_binary;
+
+/* Data main types of SQL data; NOTE! character data types requiring
+collation transformation must have the smallest codes! All codes must be
+less than 256! */
+#define	DATA_VARCHAR	1	/* character varying */
+#define DATA_CHAR	2	/* fixed length character */
+#define DATA_FIXBINARY	3	/* binary string of fixed length */
+#define DATA_BINARY	4	/* binary string */
+#define DATA_BLOB	5	/* binary large object */
+#define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
+#define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
+#define	DATA_SYS	8	/* system column */
+/* Data types >= DATA_FLOAT must be compared using the whole field, not as
+binary strings */
+#define DATA_FLOAT	9
+#define DATA_DOUBLE	10
+#define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
+#define	DATA_VARMYSQL	12	/* data types for which comparisons must be */
+#define	DATA_MYSQL	13	/* made by MySQL */
+#define DATA_ERROR	111	/* error value */
+#define DATA_MTYPE_MAX	255
+/*-------------------------------------------*/
+/* Precise data types for system columns; NOTE: the values must run
+from 0 up in the order given! All codes must be less than 256! */
+#define	DATA_ROW_ID	0	/* row id: a dulint */
+#define DATA_ROW_ID_LEN	6	/* stored length for row id */
+#define DATA_TRX_ID	1	/* transaction id: 6 bytes */
+#define DATA_TRX_ID_LEN	6
+#define	DATA_ROLL_PTR	2	/* rollback data pointer: 7 bytes */
+#define DATA_ROLL_PTR_LEN 7
+#define DATA_MIX_ID	3	/* mixed index label: a dulint, stored in
+				a row in a compressed form */
+#define DATA_MIX_ID_LEN	9	/* maximum stored length for mix id (in a
+				compressed dulint form) */
+#define	DATA_N_SYS_COLS 4 	/* number of system columns defined above */
+#define DATA_NOT_NULL	256	/* this is ORed to the precise type when
+				the column is declared as NOT NULL */
+#define DATA_UNSIGNED	512	/* this id ORed to the precise type when
+				we have an unsigned integer type */
+/*-------------------------------------------*/
+
+/* Precise types of a char or varchar data. All codes must be less than 256! */
+#define DATA_ENGLISH	4	/* English language character string */
+#define	DATA_FINNISH	5	/* Finnish */
+#define DATA_PRTYPE_MAX	255
+
+/* This many bytes we need to store the type information affecting the
+alphabetical order for a single field and decide the storage size of an
+SQL null*/
+#define DATA_ORDER_NULL_TYPE_BUF_SIZE	4
+
+/*************************************************************************
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+	dtype_t*	type,	/* in: type struct to init */
+	ulint		mtype,	/* in: main data type */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length of type */
+	ulint		prec);	/* in: precision of type */
+/*************************************************************************
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+	dtype_t*	type1,	/* in: type struct to copy to */
+	dtype_t*	type2);	/* in: type struct to copy from */
+/*************************************************************************
+Gets the SQL main data type. */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the precise data type. */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the type length. */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the type precision. */
+UNIV_INLINE
+ulint
+dtype_get_prec(
+/*===========*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the padding character code for the type. */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+				/* out: padding character code, or
+				ULINT_UNDEFINED if no padding specified */
+	dtype_t*	type);	/* in: typeumn */
+/*************************************************************************
+Transforms the character code so that it is ordered appropriately
+for the language. */
+UNIV_INLINE
+ulint
+dtype_collate(
+/*==========*/
+				/* out: padding character */
+	dtype_t*	type,	/* in: type */
+	ulint		code);	/* in: character code stored in database
+				record */
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size(
+/*=================*/
+				/* out: fixed size, or 0 */
+	dtype_t*	type);	/* in: type */
+/***************************************************************************
+Returns a stored SQL NULL size for a type. For fixed length types it is
+the fixed length of the type, otherwise 0. */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+				/* out: SQL null storage size */
+	dtype_t*	type);	/* in: type */
+/***************************************************************************
+Returns TRUE if a type is of a fixed size. */
+UNIV_INLINE
+ibool
+dtype_is_fixed_size(
+/*================*/
+				/* out: TRUE if fixed size */
+	dtype_t*	type);	/* in: type */
+/**************************************************************************
+Stores to a type the information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_store_for_order_and_null_size(
+/*================================*/
+	byte*		buf,	/* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
+				bytes */
+	dtype_t*	type);	/* in: type struct */
+/**************************************************************************
+Reads of a type the stored information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf);	/* in: buffer for type order info */
+/*************************************************************************
+Validates a data type structure. */
+
+ibool
+dtype_validate(
+/*===========*/
+				/* out: TRUE if ok */
+	dtype_t*	type);	/* in: type struct to validate */
+/*************************************************************************
+Prints a data type structure. */
+
+void
+dtype_print(
+/*========*/
+	dtype_t*	type);	/* in: type */
+
+/* Structure for an SQL data type */
+
+struct dtype_struct{
+	ulint	mtype;		/* main data type */
+	ulint	prtype;		/* precise type; MySQL data type */
+
+	/* remaining two fields do not affect alphabetical ordering: */
+
+	ulint	len;		/* length */
+	ulint	prec;		/* precision */
+};
+
+#ifndef UNIV_NONINL
+#include "data0type.ic"
+#endif
+
+#endif
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
new file mode 100644
index 00000000000..ca93c6a5383
--- /dev/null
+++ b/innobase/include/data0type.ic
@@ -0,0 +1,248 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+
+/*************************************************************************
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+	dtype_t*	type,	/* in: type struct to init */
+	ulint		mtype,	/* in: main data type */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length of type */
+	ulint		prec)	/* in: precision of type */
+{
+	ut_ad(type);
+	ut_ad(mtype <= DATA_MTYPE_MAX);
+	
+	type->mtype = mtype;
+	type->prtype = prtype;
+	type->len = len;
+	type->prec = prec;
+
+	ut_ad(dtype_validate(type));
+}
+
+/*************************************************************************
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+	dtype_t*	type1,	/* in: type struct to copy to */
+	dtype_t*	type2)	/* in: type struct to copy from */
+{
+	*type1 = *type2;
+
+	ut_ad(dtype_validate(type1));
+}
+
+/*************************************************************************
+Gets the SQL main data type. */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->mtype);
+}
+
+/*************************************************************************
+Gets the precise data type. */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->prtype);
+}
+
+/*************************************************************************
+Gets the type length. */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->len);
+}
+
+/*************************************************************************
+Gets the type precision. */
+UNIV_INLINE
+ulint
+dtype_get_prec(
+/*===========*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->prec);
+}
+
+/*************************************************************************
+Gets the padding character code for the type. */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+				/* out: padding character code, or
+				ULINT_UNDEFINED if no padding specified */
+	dtype_t*	type)	/* in: type */
+{
+	if (type->mtype == DATA_CHAR) {
+		/* space is the padding character for all char strings */
+
+		return((ulint)' ');
+	}
+
+	ut_ad((type->mtype == DATA_BINARY) || (type->mtype == DATA_VARCHAR));
+
+	/* No padding specified */
+
+	return(ULINT_UNDEFINED);
+}
+
+/*************************************************************************
+Transforms the character code so that it is ordered appropriately for the
+language. */
+UNIV_INLINE
+ulint
+dtype_collate(
+/*==========*/
+				/* out: collation order position */
+	dtype_t*	type,	/* in: type */
+	ulint		code)	/* in: character code stored in database
+				record */
+{
+	ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR));
+
+	return(toupper(code));
+}
+
+/**************************************************************************
+Stores to a type the information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_store_for_order_and_null_size(
+/*================================*/
+	byte*		buf,	/* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
+				bytes */
+	dtype_t*	type)	/* in: type struct */
+{
+	ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	ut_ad(type->prtype < 256);
+	
+	buf[0] = (byte)(type->mtype & 0xFF);
+	buf[1] = (byte)(type->prtype & 0xFF);
+
+	mach_write_to_2(buf + 2, type->len & 0xFFFF);
+}
+
+/**************************************************************************
+Reads of a type the stored information which determines its alphabetical
+ordering. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf)	/* in: buffer for type order info */
+{
+	ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	
+	type->mtype = buf[0];
+	type->prtype = buf[1];
+
+	type->len = mach_read_from_2(buf + 2);
+}	
+
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size(
+/*=================*/
+				/* out: fixed size, or 0 */
+	dtype_t*	type)	/* in: type */
+{
+	ulint	mtype;
+
+	mtype = dtype_get_mtype(type);
+
+	switch (mtype) {
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_INT:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+	case DATA_MYSQL:
+			return(dtype_get_len(type));
+
+	case DATA_SYS: 	if (type->prtype == DATA_ROW_ID) {
+				return(DATA_ROW_ID_LEN);
+			} else {
+				return(0);
+			}
+	case DATA_VARCHAR:
+	case DATA_BINARY:
+	case DATA_DECIMAL:
+	case DATA_VARMYSQL:
+	case DATA_BLOB:
+			return(0); 
+	default:	ut_a(0);
+	}
+
+	return(0);
+}
+
+/***************************************************************************
+Returns a stored SQL NULL size for a type. For fixed length types it is
+the fixed length of the type, otherwise 0. */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+				/* out: SQL null storage size */
+	dtype_t*	type)	/* in: type */
+{
+	return(dtype_get_fixed_size(type));
+}
+
+/***************************************************************************
+Returns TRUE if a type is of a fixed size. */
+UNIV_INLINE
+ibool
+dtype_is_fixed_size(
+/*================*/
+				/* out: TRUE if fixed size */
+	dtype_t*	type)	/* in: type */
+{
+	ulint	size;
+
+	size = dtype_get_fixed_size(type);
+
+	if (size) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/innobase/include/data0types.h b/innobase/include/data0types.h
new file mode 100644
index 00000000000..ab314f8f471
--- /dev/null
+++ b/innobase/include/data0types.h
@@ -0,0 +1,19 @@
+/************************************************************************
+Some type definitions
+
+(c) 1994-2000 Innobase Oy
+
+Created 9/21/2000 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0types_h
+#define data0types_h
+
+/* SQL data field struct */
+typedef struct dfield_struct	dfield_t;
+
+/* SQL data tuple struct */
+typedef struct dtuple_struct	dtuple_t;
+
+#endif
+
diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h
new file mode 100644
index 00000000000..34513545faa
--- /dev/null
+++ b/innobase/include/db0err.h
@@ -0,0 +1,44 @@
+/******************************************************
+Global error codes for the database
+
+(c) 1996 Innobase Oy
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+#define DB_SUCCESS		10
+
+/* The following are error codes */
+#define	DB_ERROR		11
+#define DB_OUT_OF_MEMORY	12
+#define DB_OUT_OF_FILE_SPACE	13
+#define DB_LOCK_WAIT		14
+#define DB_DEADLOCK		15
+#define DB_ROLLBACK		16
+#define DB_DUPLICATE_KEY	17
+#define DB_QUE_THR_SUSPENDED	18
+#define DB_MISSING_HISTORY	19	/* required history data has been
+					deleted due to lack of space in
+					rollback segment */
+#define DB_CLUSTER_NOT_FOUND	30
+#define DB_TABLE_NOT_FOUND	31
+#define DB_MUST_GET_MORE_FILE_SPACE 32	/* the database has to be stopped
+					and restrated with more file space */
+#define DB_TABLE_IS_BEING_USED	33
+#define DB_TOO_BIG_RECORD	34	/* a record in an index would become
+					bigger than 1/2 free space in a page
+					frame */
+					
+/* The following are partial failure codes */
+#define DB_FAIL 		1000
+#define DB_OVERFLOW 		1001
+#define DB_UNDERFLOW 		1002
+#define DB_STRONG_FAIL		1003
+#define DB_RECORD_NOT_FOUND	1500
+#define DB_END_OF_INDEX		1501
+
+#endif 
diff --git a/innobase/include/dict0boot.h b/innobase/include/dict0boot.h
new file mode 100644
index 00000000000..71180439913
--- /dev/null
+++ b/innobase/include/dict0boot.h
@@ -0,0 +1,132 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0boot_h
+#define dict0boot_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "buf0buf.h"
+#include "fsp0fsp.h"
+#include "dict0dict.h"
+
+typedef	byte	dict_hdr_t;
+
+/**************************************************************************
+Gets a pointer to the dictionary header and x-latches its page. */
+UNIV_INLINE
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+			/* out: pointer to the dictionary header, 
+			page x-latched */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+Returns a new row, table, index, or tree id. */
+UNIV_INLINE
+dulint
+dict_hdr_get_new_id(
+/*================*/
+			/* out: the new id */
+	ulint	type);	/* in: DICT_HDR_ROW_ID, ... */
+/**************************************************************************
+Returns a new row id. */
+UNIV_INLINE
+dulint
+dict_sys_get_new_row_id(void);
+/*=========================*/
+			/* out: the new id */
+/**************************************************************************
+Reads a row id from a record or other 6-byte stored form. */
+UNIV_INLINE
+dulint
+dict_sys_read_row_id(
+/*=================*/
+			/* out: row id */
+	byte*	field);	/* in: record field */
+/**************************************************************************
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+	byte*	field,	/* in: record field */
+	dulint	row_id);/* in: row id */
+/*********************************************************************
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created. */
+
+void
+dict_boot(void);
+/*===========*/
+/*********************************************************************
+Creates and initializes the data dictionary at the database creation. */
+
+void
+dict_create(void);
+/*=============*/
+
+
+/* Space id and page no where the dictionary header resides */
+#define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
+#define	DICT_HDR_PAGE_NO	FSP_DICT_HDR_PAGE_NO
+
+/* The ids for the basic system tables and their indexes */
+#define DICT_TABLES_ID		ut_dulint_create(0, 1)
+#define DICT_COLUMNS_ID		ut_dulint_create(0, 2)
+#define DICT_INDEXES_ID		ut_dulint_create(0, 3)
+#define DICT_FIELDS_ID		ut_dulint_create(0, 4)
+/* The following is a secondary index on SYS_TABLES */
+#define DICT_TABLE_IDS_ID	ut_dulint_create(0, 5)
+
+#define	DICT_HDR_FIRST_ID	10	/* the ids for tables etc. start
+					from this number, except for basic
+					system tables and their above defined
+					indexes; ibuf tables and indexes are
+					assigned as the id the number
+					DICT_IBUF_ID_MIN plus the space id */
+#define DICT_IBUF_ID_MIN	ut_dulint_create(0xFFFFFFFF, 0)
+					
+/* The offset of the dictionary header on the page */
+#define	DICT_HDR		FSEG_PAGE_DATA
+
+/*-------------------------------------------------------------*/
+/* Dictionary header offsets */
+#define DICT_HDR_ROW_ID		0	/* The latest assigned row id */
+#define	DICT_HDR_TABLE_ID	8	/* The latest assigned table id */
+#define	DICT_HDR_INDEX_ID	16	/* The latest assigned index id */
+#define	DICT_HDR_MIX_ID		24	/* The latest assigned mix id */
+#define	DICT_HDR_TABLES		32	/* Root of the table index tree */
+#define	DICT_HDR_TABLE_IDS	36	/* Root of the table index tree */
+#define	DICT_HDR_COLUMNS	40	/* Root of the column index tree */
+#define	DICT_HDR_INDEXES	44	/* Root of the index index tree */
+#define	DICT_HDR_FIELDS		48	/* Root of the index field index tree */
+
+#define DICT_HDR_FSEG_HEADER	56	/* Segment header for the tablespace
+					segment into which the dictionary
+					header is created */
+/*-------------------------------------------------------------*/
+
+/* The field number of the page number field in the sys_indexes table
+clustered index */
+#define DICT_SYS_INDEXES_PAGE_NO_FIELD	 8
+#define DICT_SYS_INDEXES_SPACE_NO_FIELD	 7
+					
+/* When a row id which is zero modulo this number (which must be a power of
+two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
+updated */
+#define DICT_HDR_ROW_ID_WRITE_MARGIN	256
+
+#ifndef UNIV_NONINL
+#include "dict0boot.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/dict0boot.ic b/innobase/include/dict0boot.ic
new file mode 100644
index 00000000000..8f1e214701f
--- /dev/null
+++ b/innobase/include/dict0boot.ic
@@ -0,0 +1,124 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+/**************************************************************************
+Writes the current value of the row id counter to the dictionary header file
+page. */
+
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+
+
+/**************************************************************************
+Gets a pointer to the dictionary header and x-latches its page. */
+UNIV_INLINE
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+			/* out: pointer to the dictionary header, 
+			page x-latched */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	dict_hdr_t*	header;
+
+	ut_ad(mtr);
+	
+	header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
+							RW_X_LATCH, mtr);
+	buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
+
+	return(header);
+}
+
+/**************************************************************************
+Returns a new table, index, or tree id. */
+UNIV_INLINE
+dulint
+dict_hdr_get_new_id(
+/*================*/
+			/* out: the new id */
+	ulint	type)	/* in: DICT_HDR_ROW_ID, ... */
+{
+	dict_hdr_t*	dict_hdr;
+	dulint		id;
+	mtr_t		mtr;
+
+	ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)
+	      || (type == DICT_HDR_MIX_ID));
+
+	mtr_start(&mtr);
+
+	dict_hdr = dict_hdr_get(&mtr);
+
+	id = mtr_read_dulint(dict_hdr + type, MLOG_8BYTES, &mtr); 
+
+	id = ut_dulint_add(id, 1);
+	
+	mlog_write_dulint(dict_hdr + type, id, MLOG_8BYTES, &mtr); 
+
+	mtr_commit(&mtr);
+
+	return(id);
+}				
+
+/**************************************************************************
+Returns a new row id. */
+UNIV_INLINE
+dulint
+dict_sys_get_new_row_id(void)
+/*=========================*/
+			/* out: the new id */
+{
+	dulint	id;
+
+	mutex_enter(&(dict_sys->mutex));
+
+	id = dict_sys->row_id;
+	
+	if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
+
+		dict_hdr_flush_row_id();
+	}
+
+	UT_DULINT_INC(dict_sys->row_id);
+
+	mutex_exit(&(dict_sys->mutex));
+
+	return(id);
+}			
+
+/**************************************************************************
+Reads a row id from a record or other 6-byte stored form. */
+UNIV_INLINE
+dulint
+dict_sys_read_row_id(
+/*=================*/
+			/* out: row id */
+	byte*	field)	/* in: record field */
+{
+	ut_ad(DATA_ROW_ID_LEN == 6);
+
+	return(mach_read_from_6(field));
+}				
+
+/**************************************************************************
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+	byte*	field,	/* in: record field */
+	dulint	row_id)	/* in: row id */
+{
+	ut_ad(DATA_ROW_ID_LEN == 6);
+
+	mach_write_to_6(field, row_id);
+}				
+
+
diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h
new file mode 100644
index 00000000000..6bc31e1e722
--- /dev/null
+++ b/innobase/include/dict0crea.h
@@ -0,0 +1,140 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0crea_h
+#define dict0crea_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0dict.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+					
+/*************************************************************************
+Creates the default clustered index for a table: the records are ordered
+by row id. */
+
+void
+dict_create_default_index(
+/*======================*/
+	dict_table_t*	table,	/* in: table */
+	trx_t*		trx);	/* in: transaction handle */
+/*************************************************************************
+Creates a table create graph. */
+
+tab_node_t*
+tab_create_graph_create(
+/*====================*/
+				/* out, own: table create node */
+	dict_table_t*	table,	/* in: table to create, built as a memory data
+				structure */
+	mem_heap_t*	heap);	/* in: heap where created */
+/*************************************************************************
+Creates an index create graph. */
+
+ind_node_t*
+ind_create_graph_create(
+/*====================*/
+				/* out, own: index create node */
+	dict_index_t*	index,	/* in: index to create, built as a memory data
+				structure */
+	mem_heap_t*	heap);	/* in: heap where created */
+/***************************************************************
+Creates a table. This is a high-level function used in SQL execution graphs. */
+
+que_thr_t*
+dict_create_table_step(
+/*===================*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************
+Creates an index. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+dict_create_index_step(
+/*===================*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/***********************************************************************
+Drops the index tree associated with a row in SYS_INDEXES table. */
+
+void
+dict_drop_index_tree(
+/*=================*/
+	rec_t*	rec,	/* in: record in the clustered index of SYS_INDEXES
+			table */
+	mtr_t*	mtr);	/* in: mtr having the latch on the record page */
+
+
+/* Table create node structure */
+
+struct tab_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_TABLE_CREATE */
+	dict_table_t*	table;	/* table to create, built as a memory data
+				structure with dict_mem_... functions */
+	ins_node_t*	tab_def; /* child node which does the insert of
+				the table definition; the row to be inserted
+				is built by the parent node  */
+	ins_node_t*	col_def; /* child node which does the inserts of
+				the column definitions; the row to be inserted
+				is built by the parent node  */
+	commit_node_t*	commit_node;
+				/* child node which performs a commit after
+				a successful table creation */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/* node execution state */
+	ulint		col_no;	/* next column definition to insert */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
+};
+
+/* Table create node states */
+#define	TABLE_BUILD_TABLE_DEF	1
+#define	TABLE_BUILD_COL_DEF	2
+#define	TABLE_COMMIT_WORK	3
+#define	TABLE_ADD_TO_CACHE	4
+#define	TABLE_COMPLETED		5
+
+/* Index create node struct */
+
+struct ind_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_INDEX_CREATE */
+	dict_index_t*	index;	/* index to create, built as a memory data
+				structure with dict_mem_... functions */
+	ins_node_t*	ind_def; /* child node which does the insert of
+				the index definition; the row to be inserted
+				is built by the parent node  */
+	ins_node_t*	field_def; /* child node which does the inserts of
+				the field definitions; the row to be inserted
+				is built by the parent node  */
+	commit_node_t*	commit_node;
+				/* child node which performs a commit after
+				a successful index creation */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/* node execution state */
+	dict_table_t*	table;	/* table which owns the index */
+	dtuple_t*	ind_row;/* index definition row built */
+	ulint		field_no;/* next field definition to insert */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
+};
+
+/* Index create node states */
+#define	INDEX_BUILD_INDEX_DEF	1
+#define	INDEX_BUILD_FIELD_DEF	2
+#define	INDEX_CREATE_INDEX_TREE	3
+#define	INDEX_COMMIT_WORK	4
+#define	INDEX_ADD_TO_CACHE	5
+
+#ifndef UNIV_NONINL
+#include "dict0crea.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0crea.ic b/innobase/include/dict0crea.ic
new file mode 100644
index 00000000000..b4da2d7e03f
--- /dev/null
+++ b/innobase/include/dict0crea.ic
@@ -0,0 +1,8 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
new file mode 100644
index 00000000000..b4ff9e90c75
--- /dev/null
+++ b/innobase/include/dict0dict.h
@@ -0,0 +1,677 @@
+/******************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0dict_h
+#define dict0dict_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0mem.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "hash0hash.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "trx0types.h"
+
+/**************************************************************************
+Inits the data dictionary module. */
+
+void
+dict_init(void);
+/*===========*/
+/**************************************************************************
+Returns a stored procedure object and memoryfixes it. */
+UNIV_INLINE
+dict_proc_t*
+dict_procedure_get(
+/*===============*/
+				/* out: procedure, NULL if does not exist */
+	char*	proc_name,	/* in: table name */
+	trx_t*	trx);		/* in: transaction handle or NULL */
+/**************************************************************************
+Adds a stored procedure object to the dictionary cache. */
+
+void
+dict_procedure_add_to_cache(
+/*========================*/
+	dict_proc_t*	proc);	/* in: procedure */
+/**************************************************************************
+Reserves a parsed copy of a stored procedure to execute. If there are no
+free parsed copies left at the moment, parses a new copy. Takes the copy off
+the list of copies: the copy must be returned there with
+dict_procedure_release_parsed_copy. */
+
+que_t*
+dict_procedure_reserve_parsed_copy(
+/*===============================*/
+				/* out: the query graph */
+	dict_proc_t*	proc);	/* in: dictionary procedure node */
+/**************************************************************************
+Releases a parsed copy of an executed stored procedure. Puts the copy to the
+list of copies. */
+
+void
+dict_procedure_release_parsed_copy(
+/*===============================*/
+	que_t*	graph);	/* in: query graph of a stored procedure */
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+	dict_col_t*	col);
+/*************************************************************************
+Gets the column number. */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	dict_col_t*	col);
+/*************************************************************************
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	dict_col_t*	col);
+/**************************************************************************
+Adds a table object to the dictionary cache. */
+
+void
+dict_table_add_to_cache(
+/*====================*/
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Removes a table object from the dictionary cache. */
+
+void
+dict_table_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table);	/* in, own: table */
+/**************************************************************************
+Renames a table object. */
+
+ibool
+dict_table_rename_in_cache(
+/*=======================*/
+					/* out: TRUE if success */
+	dict_table_t*	table,		/* in: table */
+	char*		new_name);	/* in: new name */
+/**************************************************************************
+Returns a table object and memoryfixes it. NOTE! This is a high-level
+function to be used mainly from outside the 'dict' directory. Inside this
+directory dict_table_get_low is usually the appropriate function. */
+
+dict_table_t*
+dict_table_get(
+/*===========*/
+				/* out: table, NULL if does not exist */
+	char*	table_name,	/* in: table name */
+	trx_t*	trx);		/* in: transaction handle */
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+
+dict_table_t*
+dict_table_get_on_id(
+/*=================*/
+				/* out: table, NULL if does not exist */
+	dulint	table_id,	/* in: table id */
+	trx_t*	trx);		/* in: transaction handle */
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+				/* out: table, NULL if does not exist */
+	dulint	table_id,	/* in: table id */
+	trx_t*	trx);		/* in: transaction handle */
+/**************************************************************************
+Releases a table from being memoryfixed. Currently this has no relevance. */
+UNIV_INLINE
+void
+dict_table_release(
+/*===============*/
+	dict_table_t*	table);	/* in: table to be released */
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+				/* out: table, NULL if not found */
+	char*	table_name);	/* in: table name */
+/**************************************************************************
+Returns an index object. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index(
+/*=================*/
+				/* out: index, NULL if does not exist */
+	dict_table_t*	table,	/* in: table */
+	char*		name);	/* in: index name */
+/**************************************************************************
+Returns an index object. */
+
+dict_index_t*
+dict_table_get_index_noninline(
+/*===========================*/
+				/* out: index, NULL if does not exist */
+	dict_table_t*	table,	/* in: table */
+	char*		name);	/* in: index name */
+/**************************************************************************
+Prints a table definition. */
+
+void
+dict_table_print(
+/*=============*/
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Prints a table data when we know the table name. */
+
+void
+dict_table_print_by_name(
+/*=====================*/
+	char*	name);
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+				/* out: index, NULL if none exists */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+
+dict_index_t*
+dict_table_get_first_index_noninline(
+/*=================================*/
+				/* out: index, NULL if none exists */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the next index on the table. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+				/* out: index, NULL if none left */
+	dict_index_t*	index);	/* in: index */
+/************************************************************************
+Gets the next index on the table. */
+
+dict_index_t*
+dict_table_get_next_index_noninline(
+/*================================*/
+				/* out: index, NULL if none left */
+	dict_index_t*	index);	/* in: index */
+/************************************************************************
+Gets the number of user-defined columns in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+				/* out: number of user-defined (e.g., not
+				ROW_ID) columns of a table */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the number of system columns in a table in the dictionary cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+				/* out: number of system (e.g.,
+				ROW_ID) columns of a table */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the number of all columns (also system) in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+				/* out: number of columns of a table */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the nth column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		pos);	/* in: position of column */
+/************************************************************************
+Gets the nth column of a table. */
+
+dict_col_t*
+dict_table_get_nth_col_noninline(
+/*=============================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		pos);	/* in: position of column */
+/************************************************************************
+Gets the given system column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys);	/* in: DATA_ROW_ID, ... */
+/************************************************************************
+Gets the given system column number of a table. */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+				/* out: column number */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys);	/* in: DATA_ROW_ID, ... */
+/***********************************************************************
+Copies types of columns contained in table to tuple. */
+
+void
+dict_table_copy_types(
+/*==================*/
+	dtuple_t*	tuple,	/* in: data tuple */
+	dict_table_t*	table);	/* in: index */
+/**************************************************************************
+Adds an index to dictionary cache. */
+
+ibool
+dict_index_add_to_cache(
+/*====================*/
+				/* out: TRUE if success */
+	dict_table_t*	table,	/* in: table on which the index is */
+	dict_index_t*	index);	/* in, own: index; NOTE! The index memory
+				object is freed in this function! */
+/************************************************************************
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system. */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the number of user-defined ordering fields in the index. In the internal
+representation we add the row id to the ordering fields to make all indexes
+unique, but this function returns the number of fields the user defined
+in the index as ordering fields. */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the nth field of an index. */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+				/* out: pointer to field object */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos);	/* in: position of field */
+/************************************************************************
+Gets pointer to the nth field data type in an index. */
+UNIV_INLINE
+dtype_t*
+dict_index_get_nth_type(
+/*====================*/
+				/* out: data type */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos);	/* in: position of the field */
+/************************************************************************
+Gets the column number of the nth field in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+				/* out: column number */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos);	/* in: position of the field */
+/************************************************************************
+Looks for column n in an index. */
+
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+				/* out: position in internal representation
+				of the index; if not contained, returns
+				ULINT_UNDEFINED */
+	dict_index_t*	index,	/* in: index */
+	ulint		n);	/* in: column number */
+/************************************************************************
+Looks for column n position in the clustered index. */
+
+ulint
+dict_table_get_nth_col_pos(
+/*=======================*/
+				/* out: position in internal representation
+				of the clustered index */
+	dict_table_t*	table,	/* in: table */
+	ulint		n);	/* in: column number */
+/************************************************************************
+Returns the position of a system column in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+				/* out: position, ULINT_UNDEFINED if not
+				contained */
+	dict_index_t*	index,	/* in: index */
+	ulint		type);	/* in: DATA_ROW_ID, ... */
+/***********************************************************************
+Copies types of fields contained in index to tuple. */
+
+void
+dict_index_copy_types(
+/*==================*/
+	dtuple_t*	tuple,		/* in: data tuple */
+	dict_index_t*	index,		/* in: index */
+	ulint		n_fields);	/* in: number of field types to copy */
+/************************************************************************
+Gets the value of a system column in a clustered index record. The clustered
+index must contain the system column: if the index is unique, row id is
+not contained there! */
+UNIV_INLINE
+dulint
+dict_index_rec_get_sys_col(
+/*=======================*/
+				/* out: system column value */
+	dict_index_t*	index,	/* in: clustered index describing the record */
+	ulint		type,	/* in: column type: DATA_ROLL_PTR, ... */
+	rec_t*		rec);	/* in: record */
+/*************************************************************************
+Gets the index tree where the index is stored. */
+UNIV_INLINE
+dict_tree_t*
+dict_index_get_tree(
+/*================*/
+				/* out: index tree */
+	dict_index_t*	index);	/* in: index */
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+	dict_col_t*	col);
+/*************************************************************************
+Gets the field order criterion. */
+UNIV_INLINE
+ulint
+dict_field_get_order(
+/*=================*/
+	dict_field_t*	field);
+/*************************************************************************
+Gets the field column. */
+UNIV_INLINE
+dict_col_t*
+dict_field_get_col(
+/*===============*/
+	dict_field_t*	field);
+/**************************************************************************
+Creates an index tree struct. */
+
+dict_tree_t*
+dict_tree_create(
+/*=============*/
+				/* out, own: created tree */
+	dict_index_t*	index);	/* in: the index for which to create: in the
+				case of a mixed tree, this should be the
+				index of the cluster object */
+/**************************************************************************
+Frees an index tree struct. */
+
+void
+dict_tree_free(
+/*===========*/
+	dict_tree_t*	tree);	/* in, own: index tree */
+/**************************************************************************
+In an index tree, finds the index corresponding to a record in the tree. */
+
+dict_index_t*
+dict_tree_find_index(
+/*=================*/
+				/* out: index */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec);	/* in: record for which to find correct index */
+/**************************************************************************
+In an index tree, finds the index corresponding to a dtuple which is used
+in a search to a tree. */
+
+dict_index_t*
+dict_tree_find_index_for_tuple(
+/*===========================*/
+				/* out: index; NULL if the tuple does not
+				contain the mix id field in a mixed tree */
+	dict_tree_t*	tree,	/* in: index tree */
+	dtuple_t*	tuple);	/* in: tuple for which to find index */
+/***********************************************************************
+Checks if a table which is a mixed cluster member owns a record. */
+UNIV_INLINE
+ibool
+dict_is_mixed_table_rec(
+/*====================*/
+				/* out: TRUE if the record belongs to this
+				table */
+	dict_table_t*	table,	/* in: table in a mixed cluster */
+	rec_t*		rec);	/* in: user record in the clustered index */
+/**************************************************************************
+Returns an index object if it is found in the dictionary cache. */
+
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+				/* out: index, NULL if not found */
+	dulint	index_id);	/* in: index id */
+/**************************************************************************
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer. */
+
+ibool
+dict_tree_check_search_tuple(
+/*=========================*/
+				/* out: TRUE if ok */
+	dict_tree_t*	tree,	/* in: index tree */
+	dtuple_t*	tuple);	/* in: tuple used in a search */
+/**************************************************************************
+Builds a node pointer out of a physical record and a page number. */
+
+dtuple_t*
+dict_tree_build_node_ptr(
+/*=====================*/
+				/* out, own: node pointer */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec,	/* in: record for which to build node pointer */
+	ulint		page_no,/* in: page number to put in node pointer */
+	mem_heap_t*	heap);	/* in: memory heap where pointer created */
+/**************************************************************************
+Copies an initial segment of a physical record, long enough to specify an
+index entry uniquely. */
+
+rec_t*
+dict_tree_copy_rec_order_prefix(
+/*============================*/
+				/* out: pointer to the prefix record */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec,	/* in: record for which to copy prefix */
+	byte**		buf,	/* in/out: memory buffer for the copied prefix,
+				or NULL */
+	ulint*		buf_size);/* in/out: buffer size */
+/**************************************************************************
+Builds a typed data tuple out of a physical record. */
+
+dtuple_t*
+dict_tree_build_data_tuple(
+/*=======================*/
+				/* out, own: data tuple */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec,	/* in: record for which to build data tuple */
+	mem_heap_t*	heap);	/* in: memory heap where tuple created */
+/*************************************************************************
+Gets the space id of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_space(
+/*================*/
+				/* out: space id */
+	dict_tree_t*	tree);	/* in: tree */
+/*************************************************************************
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_tree_set_space(
+/*================*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		space);	/* in: space id */
+/*************************************************************************
+Gets the page number of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_page(
+/*===============*/
+				/* out: page number */
+	dict_tree_t*	tree);	/* in: tree */
+/*************************************************************************
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_tree_set_page(
+/*===============*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		page);	/* in: page number */
+/*************************************************************************
+Gets the type of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_type(
+/*===============*/
+				/* out: type */
+	dict_tree_t*	tree);	/* in: tree */
+/*************************************************************************
+Gets the read-write lock of the index tree. */
+UNIV_INLINE
+rw_lock_t*
+dict_tree_get_lock(
+/*===============*/
+				/* out: read-write lock */
+	dict_tree_t*	tree);	/* in: tree */
+/************************************************************************
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index. */
+UNIV_INLINE
+ulint
+dict_tree_get_space_reserve(
+/*========================*/
+				/* out: number of free bytes on page,
+				reserved for updates */
+	dict_tree_t*	tree);	/* in: a tree */
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics(
+/*===================*/
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Reserves the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_enter_for_mysql(void);
+/*============================*/
+/************************************************************************
+Releases the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_exit_for_mysql(void);
+/*===========================*/
+
+
+extern dict_sys_t*		dict_sys;	/* the dictionary system */
+
+/* Dictionary system struct */
+struct dict_sys_struct{
+	mutex_t		mutex;		/* mutex protecting the data
+					dictionary; protects also the
+					disk-based dictionary system tables;
+					this mutex serializes CREATE TABLE
+					and DROP TABLE, as well as reading
+					the dictionary data for a table from
+					system tables */
+	dulint		row_id;		/* the next row id to assign;
+					NOTE that at a checkpoint this
+					must be written to the dict system
+					header and flushed to a file; in
+					recovery this must be derived from
+					the log records */
+	hash_table_t* 	table_hash;	/* hash table of the tables, based
+					on name */
+	hash_table_t* 	table_id_hash;	/* hash table of the tables, based
+					on id */
+	hash_table_t* 	col_hash;	/* hash table of the columns */
+	hash_table_t*	procedure_hash;	/* hash table of the stored
+					procedures */
+	UT_LIST_BASE_NODE_T(dict_table_t)
+			table_LRU; 	/* LRU list of tables */
+	ulint		size;		/* varying space in bytes occupied
+					by the data dictionary table and
+					index objects */
+	dict_table_t*	sys_tables;	/* SYS_TABLES table */
+	dict_table_t*	sys_columns;	/* SYS_COLUMNS table */
+	dict_table_t*	sys_indexes;	/* SYS_INDEXES table */
+	dict_table_t*	sys_fields;	/* SYS_FIELDS table */
+};					
+
+#ifndef UNIV_NONINL
+#include "dict0dict.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic
new file mode 100644
index 00000000000..549a5763b44
--- /dev/null
+++ b/innobase/include/dict0dict.ic
@@ -0,0 +1,696 @@
+/**********************************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "dict0load.h"
+#include "trx0undo.h"
+#include "trx0sys.h"
+#include "rem0rec.h"
+
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+	dict_col_t*	col)
+{
+	ut_ad(col);
+
+	return(&col->type);
+}
+
+/*************************************************************************
+Gets the column number. */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	dict_col_t*	col)
+{
+	ut_ad(col);
+
+	return(col->ind);
+}
+
+/*************************************************************************
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	dict_col_t*	col)
+{
+	ut_ad(col);
+
+	return(col->clust_pos);
+}
+
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+				/* out: index, NULL if none exists */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(UT_LIST_GET_FIRST(table->indexes));
+}
+
+/************************************************************************
+Gets the next index on the table. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+				/* out: index, NULL if none left */
+	dict_index_t*	index)	/* in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(UT_LIST_GET_NEXT(indexes, index));
+}
+
+/************************************************************************
+Gets the number of user-defined columns in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+				/* out: number of user-defined (e.g., not
+				ROW_ID) columns of a table */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+	
+	return(table->n_cols - DATA_N_SYS_COLS);
+}
+
+/************************************************************************
+Gets the number of system columns in a table in the dictionary cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+				/* out: number of system (e.g.,
+				ROW_ID) columns of a table */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+
+	return(DATA_N_SYS_COLS);
+}
+
+/************************************************************************
+Gets the number of all columns (also system) in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+				/* out: number of columns of a table */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+	
+	return(table->n_cols);
+}
+
+/************************************************************************
+Gets the nth column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		pos)	/* in: position of column */
+{
+	ut_ad(table);
+	ut_ad(pos < table->n_def);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return((table->cols) + pos);
+}
+
+/************************************************************************
+Gets the given system column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys)	/* in: DATA_ROW_ID, ... */
+{
+	dict_col_t*	col;
+
+	ut_ad(table);
+	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	col = dict_table_get_nth_col(table, table->n_cols 
+					   - DATA_N_SYS_COLS + sys);
+	ut_ad(col->type.mtype == DATA_SYS);
+	ut_ad(col->type.prtype == sys);
+
+	return(col);
+}
+
+/************************************************************************
+Gets the given system column number of a table. */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+				/* out: column number */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys)	/* in: DATA_ROW_ID, ... */
+{
+	ut_ad(table);
+	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_cols - DATA_N_SYS_COLS + sys);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system. */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+	
+	return(index->n_fields);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+	
+	return(index->n_uniq);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+	
+	if (index->type & DICT_CLUSTERED) {
+
+		return(dict_index_get_n_unique(index));
+	}
+
+	return(dict_index_get_n_fields(index));
+}
+
+/************************************************************************
+Gets the number of user-defined ordering fields in the index. In the internal
+representation of clustered indexes we add the row id to the ordering fields
+to make a clustered index unique, but this function returns the number of
+fields the user defined in the index as ordering fields. */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	return(index->n_user_defined_cols);
+}
+
+/************************************************************************
+Gets the nth field of an index. */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+				/* out: pointer to field object */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos)	/* in: position of field */
+{
+	ut_ad(index);
+	ut_ad(pos < index->n_def);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return((index->fields) + pos);
+}
+
+/************************************************************************
+Returns the position of a system column in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+				/* out: position, ULINT_UNDEFINED if not
+				contained */
+	dict_index_t*	index,	/* in: index */
+	ulint		type)	/* in: DATA_ROW_ID, ... */
+{
+	dict_col_t*	col;
+
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(!(index->type & DICT_UNIVERSAL));
+
+	col = dict_table_get_sys_col(index->table, type);
+
+	if (index->type & DICT_CLUSTERED) {
+
+		return(col->clust_pos);
+	}
+
+	return(dict_index_get_nth_col_pos(index,
+		dict_table_get_sys_col_no(index->table, type)));
+}
+
+/************************************************************************
+Gets the value of a system column in a clustered index record. The clustered
+index must contain the system column: if the index is unique, row id is
+not contained there! */
+UNIV_INLINE
+dulint
+dict_index_rec_get_sys_col(
+/*=======================*/
+				/* out: system column value */
+	dict_index_t*	index,	/* in: clustered index describing the record */
+	ulint		type,	/* in: column type: DATA_ROLL_PTR, ... */
+	rec_t*		rec)	/* in: record */
+{
+	ulint	pos;
+	byte*	field;
+	ulint	len;
+	
+	ut_ad(index);
+	ut_ad(index->type & DICT_CLUSTERED);
+	
+	pos = dict_index_get_sys_col_pos(index, type);
+
+	ut_ad(pos != ULINT_UNDEFINED);	
+	
+	field = rec_get_nth_field(rec, pos, &len);
+
+	if (type == DATA_ROLL_PTR) {
+		ut_ad(len == 7);
+	
+		return(trx_read_roll_ptr(field));
+	} else if ((type == DATA_ROW_ID) || (type == DATA_MIX_ID)) {
+
+		return(mach_dulint_read_compressed(field));
+	} else {
+		ut_ad(type == DATA_TRX_ID);
+
+		return(trx_read_trx_id(field));
+	}
+}
+
+/*************************************************************************
+Gets the index tree where the index is stored. */
+UNIV_INLINE
+dict_tree_t*
+dict_index_get_tree(
+/*================*/
+				/* out: index tree */
+	dict_index_t*	index)	/* in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->tree);
+}	
+
+/*************************************************************************
+Gets the field order criterion. */
+UNIV_INLINE
+ulint
+dict_field_get_order(
+/*=================*/
+	dict_field_t*	field)
+{
+	ut_ad(field);
+
+	return(field->order);
+}
+
+/*************************************************************************
+Gets the field column. */
+UNIV_INLINE
+dict_col_t*
+dict_field_get_col(
+/*===============*/
+	dict_field_t*	field)
+{
+	ut_ad(field);
+
+	return(field->col);
+}
+
+/************************************************************************
+Gets pointer to the nth field data type in an index. */
+UNIV_INLINE
+dtype_t*
+dict_index_get_nth_type(
+/*====================*/
+				/* out: data type */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos)	/* in: position of the field */
+{
+	return(dict_col_get_type(dict_field_get_col(
+			dict_index_get_nth_field(index, pos))));
+}
+
+/************************************************************************
+Gets the column number the nth field in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+				/* out: column number */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos)	/* in: position of the field */
+{
+	return(dict_col_get_no(dict_field_get_col(
+			dict_index_get_nth_field(index, pos))));
+}
+
+/*************************************************************************
+Gets the space id of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_space(
+/*================*/
+				/* out: space id */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(tree->space);
+}
+
+/*************************************************************************
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_tree_set_space(
+/*================*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		space)	/* in: space id */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	tree->space = space;
+}
+
+/*************************************************************************
+Gets the page number of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_page(
+/*===============*/
+				/* out: page number */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(tree->page);
+}
+
+/*************************************************************************
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_tree_set_page(
+/*===============*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		page)	/* in: page number */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	tree->page = page;
+}
+
+/*************************************************************************
+Gets the type of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_type(
+/*===============*/
+				/* out: type */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(tree->type);
+}
+
+/*************************************************************************
+Gets the read-write lock of the index tree. */
+UNIV_INLINE
+rw_lock_t*
+dict_tree_get_lock(
+/*===============*/
+				/* out: read-write lock */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(&(tree->lock));
+}
+
+/************************************************************************
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index. */
+UNIV_INLINE
+ulint
+dict_tree_get_space_reserve(
+/*========================*/
+				/* out: number of free bytes on page,
+				reserved for updates */
+	dict_tree_t*	tree)	/* in: a tree */
+{
+	ut_ad(tree);
+
+	UT_NOT_USED(tree);
+
+	return(UNIV_PAGE_SIZE / 16);
+}
+
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+				/* out: table, NULL if not found */
+	char*	table_name)	/* in: table name */
+{
+	dict_table_t*	table;
+	ulint		table_fold;
+	
+	ut_ad(table_name);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+	
+	/* Look for the table name in the hash table */
+	table_fold = ut_fold_string(table_name);
+
+	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
+				ut_strcmp(table->name, table_name) == 0);
+	if (table == NULL) {
+		table = dict_load_table(table_name);
+	}
+
+	return(table);
+}
+
+/**************************************************************************
+Returns a stored procedure object and memoryfixes it. */
+UNIV_INLINE
+dict_proc_t*
+dict_procedure_get(
+/*===============*/
+				/* out: procedure, NULL if does not exist */
+	char*	proc_name,	/* in: table name */
+	trx_t*	trx)		/* in: transaction handle or NULL */
+{
+	dict_proc_t*	proc;
+	ulint		name_fold;
+
+	UT_NOT_USED(trx);
+
+	mutex_enter(&(dict_sys->mutex));
+	
+	/* Look for the table name in the hash table */
+	name_fold = ut_fold_string(proc_name);
+
+	HASH_SEARCH(name_hash, dict_sys->procedure_hash, name_fold, proc,
+				ut_strcmp(proc->name, proc_name) == 0);
+	if (proc != NULL) {
+		proc->mem_fix++;
+	}
+	
+	mutex_exit(&(dict_sys->mutex));
+
+	return(proc);
+}
+
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+				/* out: table, NULL if does not exist */
+	dulint	table_id,	/* in: table id */
+	trx_t*	trx)		/* in: transaction handle */
+{
+	dict_table_t*	table;
+	ulint		fold;
+
+	UT_NOT_USED(trx);
+	
+	/* Look for the table name in the hash table */
+	fold = ut_fold_dulint(table_id);
+
+	HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
+				ut_dulint_cmp(table->id, table_id) == 0);
+	if (table == NULL) {
+		table = dict_load_table_on_id(table_id);
+	}
+
+	if (table != NULL) {
+		table->mem_fix++;
+
+		/* lock_push(trx, table, LOCK_DICT_MEM_FIX) */
+	}
+	
+	/* TODO: should get the type information from MySQL */
+	
+	return(table);
+}
+
+/**************************************************************************
+Releases a table from being memoryfixed. Currently this has no relevance. */
+UNIV_INLINE
+void
+dict_table_release(
+/*===============*/
+	dict_table_t*	table)	/* in: table to be released */
+{
+	mutex_enter(&(dict_sys->mutex));
+	
+	table->mem_fix--;	
+	
+	mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Returns an index object. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index(
+/*=================*/
+				/* out: index, NULL if does not exist */
+	dict_table_t*	table,	/* in: table */
+	char*		name)	/* in: index name */
+{
+	dict_index_t*	index	= NULL;
+
+	mutex_enter(&(dict_sys->mutex));
+	
+	index = dict_table_get_first_index(table);
+
+	while (index != NULL) {
+		if (ut_strcmp(name, index->name) == 0) {
+
+			break;
+		}
+
+		index = dict_table_get_next_index(index);
+	}	
+	
+	mutex_exit(&(dict_sys->mutex));
+
+	return(index);
+}
+
+/***********************************************************************
+Checks if a table which is a mixed cluster member owns a record. */
+UNIV_INLINE
+ibool
+dict_is_mixed_table_rec(
+/*====================*/
+				/* out: TRUE if the record belongs to this
+				table */
+	dict_table_t*	table,	/* in: table in a mixed cluster */
+	rec_t*		rec)	/* in: user record in the clustered index */
+{
+	byte*	mix_id_field;
+	ulint	len;
+
+	mix_id_field = rec_get_nth_field(rec, table->mix_len, &len);
+
+	if ((len != table->mix_id_len)
+	    || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
diff --git a/innobase/include/dict0load.h b/innobase/include/dict0load.h
new file mode 100644
index 00000000000..d0298d8df37
--- /dev/null
+++ b/innobase/include/dict0load.h
@@ -0,0 +1,49 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0load_h
+#define dict0load_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "ut0byte.h"
+
+/************************************************************************
+Loads a table definition and also all its index definitions, and also
+the cluster definition, if the table is a member in a cluster. */
+
+dict_table_t*
+dict_load_table(
+/*============*/
+			/* out: table, NULL if does not exist */
+	char*	name);	/* in: table name */
+/***************************************************************************
+Loads a table object based on the table id. */
+
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+				/* out: table; NULL if table does not exist */
+	dulint	table_id);	/* in: table id */	
+/************************************************************************
+This function is called when the database is booted.
+Loads system table index definitions except for the clustered index which
+is added to the dictionary cache at booting before calling this function. */
+
+void
+dict_load_sys_table(
+/*================*/
+	dict_table_t*	table);	/* in: system table */
+
+
+#ifndef UNIV_NONINL
+#include "dict0load.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0load.ic b/innobase/include/dict0load.ic
new file mode 100644
index 00000000000..1a207fbf0fd
--- /dev/null
+++ b/innobase/include/dict0load.ic
@@ -0,0 +1,9 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
new file mode 100644
index 00000000000..42b9cb55270
--- /dev/null
+++ b/innobase/include/dict0mem.h
@@ -0,0 +1,335 @@
+/******************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0mem_h
+#define dict0mem_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "sync0rw.h"
+#include "lock0types.h"
+#include "hash0hash.h"
+#include "que0types.h"
+
+/* Type flags of an index: OR'ing of the flags is allowed to define a
+combination of types */
+#define DICT_CLUSTERED	1	/* clustered index */
+#define DICT_UNIQUE	2	/* unique index */
+#define	DICT_UNIVERSAL 	4	/* index which can contain records from any
+				other index */
+#define	DICT_IBUF 	8	/* insert buffer tree */
+				
+/* Flags for ordering an index field: OR'ing of the flags allowed */
+#define	DICT_DESCEND	1	/* in descending order (default ascending) */
+
+/* Types for a table object */
+#define DICT_TABLE_ORDINARY		1
+#define	DICT_TABLE_CLUSTER_MEMBER	2
+#define	DICT_TABLE_CLUSTER		3 /* this means that the table is
+					  really a cluster definition */
+
+/**************************************************************************
+Creates a table memory object. */
+
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+				/* out, own: table object */
+	char*	name,		/* in: table name */
+	ulint	space,		/* in: space where the clustered index of
+				the table is placed; this parameter is
+				ignored if the table is made a member of
+				a cluster */
+	ulint	n_cols);	/* in: number of columns */
+/**************************************************************************
+Creates a cluster memory object. */
+
+dict_cluster_t*
+dict_mem_cluster_create(
+/*====================*/
+				/* out, own: cluster object (where the type
+				dict_cluster_t == dict_table_t) */
+	char*	name,		/* in: cluster name */
+	ulint	space,		/* in: space where the clustered indexes
+				of the member tables are placed */
+	ulint	n_cols,		/* in: number of columns */
+	ulint	mix_len);	/* in: length of the common key prefix in the
+				cluster */
+/**************************************************************************
+Declares a non-published table as a member in a cluster. */
+
+void
+dict_mem_table_make_cluster_member(
+/*===============================*/
+	dict_table_t*	table,		/* in: non-published table */
+	char*		cluster_name);	/* in: cluster name */
+/**************************************************************************
+Adds a column definition to a table. */
+
+void
+dict_mem_table_add_col(
+/*===================*/
+	dict_table_t*	table,	/* in: table */
+	char*		name,	/* in: column name */
+	ulint		mtype,	/* in: main datatype */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length */
+	ulint		prec);	/* in: precision */
+/**************************************************************************
+Creates an index memory object. */
+
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+				/* out, own: index object */
+	char*	table_name,	/* in: table name */
+	char*	index_name,	/* in: index name */
+	ulint	space,		/* in: space where the index tree is placed,
+				ignored if the index is of the clustered
+				type */
+	ulint	type,		/* in: DICT_UNIQUE, DICT_CLUSTERED, ... ORed */
+	ulint	n_fields);	/* in: number of fields */
+/**************************************************************************
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+
+void
+dict_mem_index_add_field(
+/*=====================*/
+	dict_index_t*	index,	/* in: index */
+	char*		name,	/* in: column name */
+	ulint		order);	/* in: order criterion; 0 means an ascending
+				order */
+/**************************************************************************
+Frees an index memory object. */
+
+void
+dict_mem_index_free(
+/*================*/
+	dict_index_t*	index);	/* in: index */
+/**************************************************************************
+Creates a procedure memory object. */
+
+dict_proc_t*
+dict_mem_procedure_create(
+/*======================*/
+					/* out, own: procedure object */
+	char*		name,		/* in: procedure name */
+	char*		sql_string,	/* in: procedure definition as an SQL
+					string */
+	que_fork_t*	graph);		/* in: parsed procedure graph */
+					
+
+/* Data structure for a column in a table */
+struct dict_col_struct{
+	hash_node_t	hash;	/* hash chain node */
+	ulint		ind;	/* table column position (they are numbered
+				starting from 0) */
+	ulint		clust_pos;/* position of the column in the
+				clustered index */
+	ulint		ord_part;/* count of how many times this column
+				appears in an ordering fields of an index */
+	char*		name;	/* name */
+	dtype_t		type;	/* data type */
+	dict_table_t*	table;	/* back pointer to table of this column */
+	ulint		aux;	/* this is used as an auxiliary variable 
+				in some of the functions below */
+};
+
+/* Data structure for a field in an index */
+struct dict_field_struct{
+	dict_col_t*	col;	/* pointer to the table column */
+	char*		name;	/* name of the column */
+	ulint		order;	/* flags for ordering this field:
+				DICT_DESCEND, ... */
+};
+
+/* Data structure for an index tree */
+struct dict_tree_struct{
+	ulint		type;	/* tree type */
+	dulint		id;	/* id of the index stored in the tree, in the
+				case of a mixed index, the id of the clustered
+				index of the cluster table */
+	ulint		space;	/* space of index tree */
+	ulint		page;	/* index tree root page number */
+	byte		pad[64];/* Padding to prevent other memory hotspots on
+				the same memory cache line */
+	rw_lock_t	lock;	/* read-write lock protecting the upper levels
+				of the index tree */
+	ulint		mem_fix;/* count of how many times this tree
+				struct has been memoryfixed (by mini-
+				transactions wanting to access the index
+				tree) */
+	UT_LIST_BASE_NODE_T(dict_index_t)
+			tree_indexes; /* list of indexes stored in the
+				index tree: if the tree is not of the
+				mixed type there is only one index in
+				the list; if the tree is of the mixed
+				type, the first index in the list is the
+				index of the cluster which owns the tree */
+	ulint		magic_n;/* magic number */
+};
+
+#define	DICT_TREE_MAGIC_N	7545676
+
+/* Data structure for an index */
+struct dict_index_struct{
+	dulint		id;	/* id of the index */
+	mem_heap_t*	heap;	/* memory heap */
+	ulint		type;	/* index type */
+	char*		name;	/* index name */
+	char*		table_name; /* table name */
+	dict_table_t*	table;	/* back pointer to table */
+	ulint		space;	/* space where the index tree is placed */
+	ulint		page_no;/* page number of the index tree root */
+	ulint		trx_id_offset;/* position of the the trx id column
+				in a clustered index record, if the fields
+				before it are known to be of a fixed size,
+				0 otherwise */
+	ulint		n_user_defined_cols;
+				/* number of columns the user defined to
+				be in the index: in the internal
+				representation we add more columns */
+	ulint		n_uniq;	/* number of fields from the beginning
+				which are enough to determine an index
+				entry uniquely */
+	ulint		n_def;	/* number of fields defined so far */
+	ulint		n_fields;/* number of fields in the index */
+	dict_field_t*	fields;	/* array of field descriptions */
+	UT_LIST_NODE_T(dict_index_t)
+			indexes;/* list of indexes of the table */
+	dict_tree_t*	tree;	/* index tree struct */
+	UT_LIST_NODE_T(dict_index_t)
+			tree_indexes; /* list of indexes of the same index
+				tree */
+	ibool		cached;	/* TRUE if the index object is in the
+				dictionary cache */
+	btr_search_t*	search_info; /* info used in optimistic searches */
+	/*----------------------*/
+	ulint		stat_n_diff_key_vals;
+				/* approximate number of different key values
+				for this index; we periodically calculate
+				new estimates */
+	ulint		stat_index_size;
+				/* approximate index size in database pages */
+	ulint		magic_n;/* magic number */
+};
+
+#define	DICT_INDEX_MAGIC_N	76789786
+
+/* Data structure for a database table */
+struct dict_table_struct{
+	dulint		id;	/* id of the table or cluster */
+	ulint		type;	/* DICT_TABLE_ORDINARY, ... */
+	mem_heap_t*	heap;	/* memory heap */
+	char*		name;	/* table name */
+	ulint		space;	/* space where the clustered index of the
+				table is placed */
+	hash_node_t	name_hash; /* hash chain node */
+	hash_node_t	id_hash; /* hash chain node */
+	ulint		n_def;	/* number of columns defined so far */
+	ulint		n_cols;	/* number of columns */
+	dict_col_t*	cols;	/* array of column descriptions */
+	UT_LIST_BASE_NODE_T(dict_index_t)
+			indexes; /* list of indexes of the table */
+	UT_LIST_NODE_T(dict_table_t)
+			table_LRU; /* node of the LRU list of tables */
+	ulint		mem_fix;/* count of how many times the table 
+				and its indexes has been fixed in memory;
+				currently NOT used */
+	ibool		cached;	/* TRUE if the table object has been added
+				to the dictionary cache */
+	UT_LIST_BASE_NODE_T(lock_t)
+			locks; /* list of locks on the table */
+	/*----------------------*/
+	dulint		mix_id;	/* if the table is a member in a cluster,
+				this is its mix id */
+	ulint		mix_len;/* if the table is a cluster or a member
+				this is the common key prefix lenght */
+	ulint		mix_id_len;/* mix id length in a compressed form */
+	byte		mix_id_buf[12];
+				/* mix id of a mixed table written in
+				a compressed form */
+	char*		cluster_name; /* if the table is a member in a
+				cluster, this is the name of the cluster */
+	/*----------------------*/
+	ibool		does_not_fit_in_memory;
+				/* this field is used to specify in simulations
+				tables which are so big that disk should be
+				accessed: disk access is simulated by
+				putting the thread to sleep for a while;
+				NOTE that this flag is not stored to the data
+				dictionary on disk, and the database will
+				forget about value TRUE if it has to reload
+				the table definition from disk */
+	/*----------------------*/
+	ulint		stat_n_rows;
+				/* approximate number of rows in the table;
+				we periodically calculate new estimates */
+	ulint		stat_clustered_index_size;
+				/* approximate clustered index size in
+				database pages */
+	ulint		stat_sum_of_other_index_sizes;
+				/* other indexes in database pages */
+	ulint		stat_last_estimate_counter;
+				/* when the estimates were last time
+				calculated; a value (ulint)-1 denotes that
+				they have not yet been calculated for this
+				table (or the counter has wrapped over) */
+	ulint		stat_modif_counter;
+				/* when a row is inserted, updated, or deleted,
+				we add the row length to this number; we
+				calculate new estimates for the stat_...
+				values for the table and the indexes at an
+				interval of DICT_STAT_CALCULATE_INTERVAL,
+				but for small tables more often, also
+				when the estimate operation is called
+				for MySQL SHOW TABLE STATUS; this counter
+				is not protected by any latch, because this
+				is only used for heuristics */
+	ulint		magic_n;/* magic number */
+};
+#define	DICT_TABLE_MAGIC_N	76333786
+
+/* Statistics are calculated at least with this interval; see the struct
+above */
+#define DICT_STAT_CALCULATE_INTERVAL	(UNIV_PAGE_SIZE * 8)
+					
+/* Data structure for a stored procedure */
+struct dict_proc_struct{
+	mem_heap_t*	heap;	/* memory heap */
+	char*		name;	/* procedure name */
+	char*		sql_string;
+				/* procedure definition as an SQL string:
+				we can produce more parsed instances of the
+				procedure by parsing this string */
+	hash_node_t	name_hash;
+				/* hash chain node */
+	UT_LIST_BASE_NODE_T(que_fork_t) graphs;
+				/* list of parsed instances of the procedure:
+				there may be many of them, and they are
+				recycled */
+	ulint		mem_fix;/* count of how many times this struct 
+				has been fixed in memory */
+};
+
+#ifndef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#endif
diff --git a/innobase/include/dict0mem.ic b/innobase/include/dict0mem.ic
new file mode 100644
index 00000000000..9bcefc2a51f
--- /dev/null
+++ b/innobase/include/dict0mem.ic
@@ -0,0 +1,9 @@
+/**********************************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+
diff --git a/innobase/include/dict0types.h b/innobase/include/dict0types.h
new file mode 100644
index 00000000000..fe1bad45063
--- /dev/null
+++ b/innobase/include/dict0types.h
@@ -0,0 +1,28 @@
+/******************************************************
+Data dictionary global types
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0types_h
+#define dict0types_h
+
+typedef struct dict_sys_struct		dict_sys_t;
+typedef struct dict_col_struct		dict_col_t;
+typedef struct dict_field_struct	dict_field_t;
+typedef struct dict_index_struct	dict_index_t;
+typedef struct dict_tree_struct		dict_tree_t;
+typedef struct dict_table_struct	dict_table_t;
+typedef struct dict_proc_struct		dict_proc_t;
+
+/* A cluster object is a table object with the type field set to
+DICT_CLUSTERED */
+
+typedef dict_table_t			dict_cluster_t;
+
+typedef struct ind_node_struct		ind_node_t;
+typedef struct tab_node_struct		tab_node_t;
+
+#endif
diff --git a/innobase/include/dyn0dyn.h b/innobase/include/dyn0dyn.h
new file mode 100644
index 00000000000..07ad8539b38
--- /dev/null
+++ b/innobase/include/dyn0dyn.h
@@ -0,0 +1,172 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dyn0dyn_h
+#define dyn0dyn_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+
+typedef struct dyn_block_struct		dyn_block_t;
+typedef dyn_block_t			dyn_array_t;
+
+
+/* Initial 'payload' size in bytes in a dynamic array block */
+#define	DYN_ARRAY_DATA_SIZE	1024
+
+/*************************************************************************
+Initializes a dynamic array. */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+				/* out: initialized dyn array */
+	dyn_array_t*	arr);	/* in: pointer to a memory buffer of
+				size sizeof(dyn_array_t) */
+/****************************************************************
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr);	/* in: dyn array */
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close. */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+				/* out: pointer to the buffer */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size);	/* in: size in bytes of the buffer */
+/*************************************************************************
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/* in: dynamic array */
+	byte*		ptr);	/* in: buffer space from ptr up was not used */
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to
+the added element. The caller must copy the element to
+the pointer returned. */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+				/* out: pointer to the element */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size);	/* in: size in bytes of the element */
+/****************************************************************
+Returns pointer to an element in dyn array. */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+				/* out: pointer to element */
+	dyn_array_t*	arr,	/* in: dyn array */
+	ulint		pos);	/* in: position of element as bytes 
+				from array start */
+/****************************************************************
+Returns the size of stored data in a dyn array. */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+				/* out: data size in bytes */
+	dyn_array_t*	arr);	/* in: dyn array */
+/****************************************************************
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+	dyn_array_t*	arr);	/* in: dyn array */
+/****************************************************************
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+	dyn_array_t*	arr);	/* in: dyn array */
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+				/* out: pointer to next, NULL if end of list */
+	dyn_array_t*	arr,	/* in: dyn array */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************************
+Gets the number of used bytes in a dyn array block. */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+				/* out: number of bytes used */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************************
+Gets pointer to the start of data in a dyn array block. */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+				/* out: pointer to data */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_block_get_next(
+/*===============*/
+				/* out: pointer to next, NULL if end of list */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/* in: dyn array */
+	byte*		str,	/* in: string to write */
+	ulint		len);	/* in: string length */
+
+/*#################################################################*/
+
+/* NOTE! Do not use the fields of the struct directly: the definition
+appears here only for the compiler to know its size! */
+struct dyn_block_struct{
+	mem_heap_t*	heap;	/* in the first block this is != NULL 
+				if dynamic allocation has been needed */
+	ulint		used;	/* number of data bytes used in this block */
+	byte		data[DYN_ARRAY_DATA_SIZE];
+				/* storage for array elements */	
+	UT_LIST_BASE_NODE_T(dyn_block_t) base;
+				/* linear list of dyn blocks: this node is
+				used only in the first block */
+	UT_LIST_NODE_T(dyn_block_t) list;
+				/* linear list node: used in all blocks */
+#ifdef UNIV_DEBUG
+	ulint		buf_end;/* only in the debug version: if dyn array is
+				opened, this is the buffer end offset, else
+				this is 0 */
+	ulint		magic_n;
+#endif
+};
+
+
+#ifndef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/dyn0dyn.ic b/innobase/include/dyn0dyn.ic
new file mode 100644
index 00000000000..dc004efbb8b
--- /dev/null
+++ b/innobase/include/dyn0dyn.ic
@@ -0,0 +1,345 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#define DYN_BLOCK_MAGIC_N	375767
+#define DYN_BLOCK_FULL_FLAG	0x1000000
+
+/****************************************************************
+Adds a new block to a dyn array. */
+
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+				/* out: created block */
+	dyn_array_t*	arr);	/* in: dyn array */
+
+
+/****************************************************************
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	return(arr);
+}
+
+/****************************************************************
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	if (arr->heap == NULL) {
+
+		return(arr);
+	}  
+
+	return(UT_LIST_GET_LAST(arr->base));
+}
+
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+				/* out: pointer to next, NULL if end of list */
+	dyn_array_t*	arr,	/* in: dyn array */
+	dyn_block_t*	block)	/* in: dyn array block */
+{
+	ut_ad(arr && block);
+	
+	if (arr->heap == NULL) {
+		ut_ad(arr == block);
+	
+		return(NULL);
+	}
+	
+	return(UT_LIST_GET_NEXT(list, block));
+}
+
+/************************************************************************
+Gets the number of used bytes in a dyn array block. */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+				/* out: number of bytes used */
+	dyn_block_t*	block)	/* in: dyn array block */
+{
+	ut_ad(block);
+
+	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
+}
+
+/************************************************************************
+Gets pointer to the start of data in a dyn array block. */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+				/* out: pointer to data */
+	dyn_block_t*	block)	/* in: dyn array block */
+{
+	ut_ad(block);
+
+	return(block->data);
+}
+
+/*************************************************************************
+Initializes a dynamic array. */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+				/* out: initialized dyn array */
+	dyn_array_t*	arr)	/* in: pointer to a memory buffer of
+				size sizeof(dyn_array_t) */
+{
+	ut_ad(arr);
+	ut_ad(DYN_ARRAY_DATA_SIZE < DYN_BLOCK_FULL_FLAG);
+
+	arr->heap = NULL;
+	arr->used = 0;
+
+#ifdef UNIV_DEBUG
+	arr->buf_end = 0;
+	arr->magic_n = DYN_BLOCK_MAGIC_N;
+#endif
+	return(arr);
+}
+
+/****************************************************************
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	if (arr->heap != NULL) {
+		mem_heap_free(arr->heap);
+	}
+
+#ifdef UNIV_DEBUG
+	arr->magic_n = 0;
+#endif
+}
+
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to the added element.
+The caller must copy the element to the pointer returned. */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+				/* out: pointer to the element */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size)	/* in: size in bytes of the element */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(size);
+	
+	block = arr;
+	used = block->used;
+
+	if (used + size > DYN_ARRAY_DATA_SIZE) {
+		/* Get the last array block */
+		
+		block = dyn_array_get_last_block(arr);
+		used = block->used;
+
+		if (used + size > DYN_ARRAY_DATA_SIZE) {
+			block = dyn_array_add_block(arr);
+			used = block->used;
+		}
+	}
+
+	block->used = used + size;
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+	return((block->data) + used);
+}
+
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close. */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+				/* out: pointer to the buffer */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size)	/* in: size in bytes of the buffer */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(size);
+	
+	block = arr;
+	used = block->used;
+
+	if (used + size > DYN_ARRAY_DATA_SIZE) {
+		/* Get the last array block */
+		
+		block = dyn_array_get_last_block(arr);
+		used = block->used;
+
+		if (used + size > DYN_ARRAY_DATA_SIZE) {
+			block = dyn_array_add_block(arr);
+			used = block->used;
+		}
+	}
+
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+#ifdef UNIV_DEBUG
+	ut_ad(arr->buf_end == 0);
+
+	arr->buf_end = used + size;
+#endif	
+	return((block->data) + used);
+}
+
+/*************************************************************************
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/* in: dynamic array */
+	byte*		ptr)	/* in: buffer space from ptr up was not used */
+{
+	dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	
+	block = dyn_array_get_last_block(arr);
+
+	ut_ad(arr->buf_end + block->data >= ptr);
+
+	block->used = ptr - block->data;
+	
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+#ifdef UNIV_DEBUG
+	arr->buf_end = 0;
+#endif
+}
+
+/****************************************************************
+Returns pointer to an element in dyn array. */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+				/* out: pointer to element */
+	dyn_array_t*	arr,	/* in: dyn array */
+	ulint		pos)	/* in: position of element as bytes 
+				from array start */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	/* Get the first array block */
+	block = dyn_array_get_first_block(arr);
+	
+	if (arr->heap != NULL) {
+		used = dyn_block_get_used(block);
+
+		while (pos >= used) {
+			pos -= used;
+			block = UT_LIST_GET_NEXT(list, block);
+			ut_ad(block);
+
+			used = dyn_block_get_used(block);
+		}
+	}
+
+	ut_ad(block);
+	ut_ad(dyn_block_get_used(block) >= pos);
+	
+	return(block->data + pos);
+}
+
+/****************************************************************
+Returns the size of stored data in a dyn array. */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+				/* out: data size in bytes */
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	dyn_block_t*	block;
+	ulint		sum 	= 0;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	if (arr->heap == NULL) {
+
+		return(arr->used);
+	}
+	
+	/* Get the first array block */
+	block = dyn_array_get_first_block(arr);
+
+	while (block != NULL) {
+		sum += dyn_block_get_used(block);
+		block = dyn_array_get_next_block(arr, block);
+	}
+
+	return(sum);
+}
+
+/************************************************************
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/* in: dyn array */
+	byte*		str,	/* in: string to write */
+	ulint		len)	/* in: string length */
+{
+	byte*	ptr;
+	ulint	n_copied;
+
+	while (len > 0) {
+		if (len > DYN_ARRAY_DATA_SIZE) {
+			n_copied = DYN_ARRAY_DATA_SIZE;
+		} else {
+			n_copied = len;
+		}			
+
+		ptr = (byte*) dyn_array_push(arr, n_copied);
+
+		ut_memcpy(ptr, str, n_copied);
+		
+		str += n_copied;
+		len -= n_copied;
+	}
+}
diff --git a/innobase/include/eval0eval.h b/innobase/include/eval0eval.h
new file mode 100644
index 00000000000..6561f0c8ae7
--- /dev/null
+++ b/innobase/include/eval0eval.h
@@ -0,0 +1,97 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0eval_h
+#define eval0eval_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/*********************************************************************
+Free the buffer from global dynamic memory for a value of a que_node,
+if it has been allocated in the above function. The freeing for pushed
+column values is done in sel_col_prefetch_buf_free. */
+
+void
+eval_node_free_val_buf(
+/*===================*/
+	que_node_t*	node);	/* in: query graph node */
+/*********************************************************************
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+	sym_node_t*	sym_node);	/* in: symbol table node */
+/*********************************************************************
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+	que_node_t*	exp_node);	/* in: expression */
+/*********************************************************************
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+	que_node_t*	node,	/* in: expression node */
+	lint		val);	/* in: value to set */
+/*********************************************************************
+Gets an integer value from an expression node. */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+				/* out: integer value */
+	que_node_t*	node);	/* in: expression node */
+/*********************************************************************
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+	que_node_t*	node,	/* in: query graph node */
+	byte*		str,	/* in: binary string */
+	ulint		len);	/* in: string length or UNIV_SQL_NULL */
+/*********************************************************************
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+	que_node_t*	node1,	/* in: node to copy to */
+	que_node_t*	node2);	/* in: node to copy from */
+/*********************************************************************
+Gets a iboolean value from a query node. */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*===================*/
+				/* out: iboolean value */
+	que_node_t*	node);	/* in: query graph node */
+/*********************************************************************
+Evaluates a comparison node. */
+
+ibool
+eval_cmp(
+/*=====*/
+					/* out: the result of the comparison */
+	func_node_t*	cmp_node);	/* in: comparison node */
+
+
+#ifndef UNIV_NONINL
+#include "eval0eval.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/eval0eval.ic b/innobase/include/eval0eval.ic
new file mode 100644
index 00000000000..2530c869206
--- /dev/null
+++ b/innobase/include/eval0eval.ic
@@ -0,0 +1,236 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "pars0grm.h"
+
+/*********************************************************************
+Evaluates a function node. */
+
+void
+eval_func(
+/*======*/
+	func_node_t*	func_node);	/* in: function node */
+/*********************************************************************
+Allocate a buffer from global dynamic memory for a value of a que_node.
+NOTE that this memory must be explicitly freed when the query graph is
+freed. If the node already has allocated buffer, that buffer is freed
+here. NOTE that this is the only function where dynamic memory should be
+allocated for a query node val field. */
+
+byte*
+eval_node_alloc_val_buf(
+/*====================*/
+				/* out: pointer to allocated buffer */
+	que_node_t*	node,	/* in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size);	/* in: buffer size */
+
+
+/*********************************************************************
+Allocates a new buffer if needed. */
+UNIV_INLINE
+byte*
+eval_node_ensure_val_buf(
+/*=====================*/
+				/* out: pointer to buffer */
+	que_node_t*	node,	/* in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size)	/* in: buffer size */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+	dfield_set_len(dfield, size);
+
+	data = dfield_get_data(dfield);
+	
+	if (!data || que_node_get_val_buf_size(node) < size) {
+
+		data = eval_node_alloc_val_buf(node, size);
+	}
+
+	return(data);
+}
+
+/*********************************************************************
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+	sym_node_t*	sym_node)	/* in: symbol table node */
+{
+
+	ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
+
+	if (sym_node->indirection) {
+		/* The symbol table node is an alias for a variable or a
+		column */
+		
+		dfield_copy_data(que_node_get_val(sym_node),
+				   que_node_get_val(sym_node->indirection));
+	}
+}
+
+/*********************************************************************
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+	que_node_t*	exp_node)	/* in: expression */
+{
+	if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
+
+		eval_sym((sym_node_t*)exp_node);
+
+		return;
+	}
+	
+	eval_func(exp_node);
+}
+
+/*********************************************************************
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+	que_node_t*	node,	/* in: expression node */
+	lint		val)	/* in: value to set */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+
+	data = dfield_get_data(dfield);
+	
+	if (data == NULL) {
+		data = eval_node_alloc_val_buf(node, 4);
+	}
+
+	ut_ad(dfield_get_len(dfield) == 4);
+	
+	mach_write_to_4(data, (ulint)val);
+}
+
+/*********************************************************************
+Gets an integer non-SQL null value from an expression node. */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+				/* out: integer value */
+	que_node_t*	node)	/* in: expression node */
+{
+	dfield_t*	dfield;
+
+	dfield = que_node_get_val(node);
+
+	ut_ad(dfield_get_len(dfield) == 4);
+
+	return((int)mach_read_from_4(dfield_get_data(dfield)));	
+}
+
+/*********************************************************************
+Gets a iboolean value from a query node. */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*===================*/
+				/* out: iboolean value */
+	que_node_t*	node)	/* in: query graph node */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+
+	data = dfield_get_data(dfield);
+	
+	ut_ad(data != NULL);
+
+	return(mach_read_from_1(data));
+}
+
+/*********************************************************************
+Sets a iboolean value as the value of a function node. */
+UNIV_INLINE
+void
+eval_node_set_ibool_val(
+/*===================*/
+	func_node_t*	func_node,	/* in: function node */
+	ibool		val)		/* in: value to set */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(func_node);
+
+	data = dfield_get_data(dfield);
+	
+	if (data == NULL) {
+		/* Allocate 1 byte to hold the value */
+
+		data = eval_node_alloc_val_buf(func_node, 1);
+	}
+
+	ut_ad(dfield_get_len(dfield) == 1);
+	
+	mach_write_to_1(data, val);
+}
+
+/*********************************************************************
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+	que_node_t*	node,	/* in: query graph node */
+	byte*		str,	/* in: binary string */
+	ulint		len)	/* in: string length or UNIV_SQL_NULL */
+{
+	byte*		data;
+	
+	ut_ad(UNIV_SQL_NULL > ULINT_MAX);
+
+	if (len == UNIV_SQL_NULL) {
+		dfield_set_len(que_node_get_val(node), len);
+
+		return;
+	}
+
+	data = eval_node_ensure_val_buf(node, len);
+	
+	ut_memcpy(data, str, len);
+}
+
+/*********************************************************************
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+	que_node_t*	node1,	/* in: node to copy to */
+	que_node_t*	node2)	/* in: node to copy from */
+{
+	dfield_t*	dfield2;
+	
+	dfield2 = que_node_get_val(node2);
+
+	eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
+						dfield_get_len(dfield2));
+}
diff --git a/innobase/include/eval0proc.h b/innobase/include/eval0proc.h
new file mode 100644
index 00000000000..5d685ad9076
--- /dev/null
+++ b/innobase/include/eval0proc.h
@@ -0,0 +1,79 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0proc_h
+#define eval0proc_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/**************************************************************************
+Performs an execution step of a procedure node. */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of an if-statement node. */
+
+que_thr_t*
+if_step(
+/*====*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a while-statement node. */
+
+que_thr_t*
+while_step(
+/*=======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a for-loop node. */
+
+que_thr_t*
+for_step(
+/*=====*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of an assignment statement node. */
+
+que_thr_t*
+assign_step(
+/*========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a procedure call node. */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a return-statement node. */
+
+que_thr_t*
+return_step(
+/*========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/eval0proc.ic b/innobase/include/eval0proc.ic
new file mode 100644
index 00000000000..0d7ecb6d1dc
--- /dev/null
+++ b/innobase/include/eval0proc.ic
@@ -0,0 +1,71 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "pars0pars.h"
+#include "que0que.h"
+#include "eval0eval.h"
+
+/**************************************************************************
+Performs an execution step of a procedure node. */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	proc_node_t*	node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
+
+	if (thr->prev_node == que_node_get_parent(node)) {
+		/* Start execution from the first statement in the statement
+		list */
+		
+		thr->run_node = node->stat_list;
+	} else {
+		/* Move to the next statement */
+		ut_ad(que_node_get_next(thr->prev_node) == NULL);
+		
+		thr->run_node = NULL;
+	}
+
+	if (thr->run_node == NULL) {
+		thr->run_node = que_node_get_parent(node);
+	}
+
+	return(thr);
+} 
+
+/**************************************************************************
+Performs an execution step of a procedure call node. */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	func_node_t*	node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+	/* Evaluate the procedure */
+
+	eval_exp(node);
+	
+	thr->run_node = que_node_get_parent(node);
+
+	return(thr);
+} 
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
new file mode 100644
index 00000000000..9905b5a2c3c
--- /dev/null
+++ b/innobase/include/fil0fil.h
@@ -0,0 +1,357 @@
+/******************************************************
+The low-level file system
+
+(c) 1995 Innobase Oy
+
+Created 10/25/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fil0fil_h
+#define fil0fil_h
+
+#include "univ.i"
+#include "sync0rw.h"
+#include "dict0types.h"
+#include "ibuf0types.h"
+#include "ut0byte.h"
+#include "os0file.h"
+
+/* 'null' (undefined) page offset in the context of file spaces */
+#define	FIL_NULL	ULINT32_UNDEFINED
+
+/* Space address data type; this is intended to be used when
+addresses accurate to a byte are stored in file pages. If the page part
+of the address is FIL_NULL, the address is considered undefined. */
+
+typedef	byte	fil_faddr_t;	/* 'type' definition in C: an address
+				stored in a file page is a string of bytes */
+#define FIL_ADDR_PAGE	0	/* first in address is the page offset */
+#define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
+
+#define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
+
+/* A struct for storing a space address FIL_ADDR, when it is used
+in C program data structures. */
+
+typedef struct fil_addr_struct	fil_addr_t;
+struct fil_addr_struct{
+	ulint	page;		/* page number within a space */
+	ulint	boffset;	/* byte offset within the page */
+};
+
+/* Null file address */
+extern fil_addr_t	fil_addr_null;
+
+/* The byte offsets on a file page for various variables */
+#define FIL_PAGE_SPACE		0	/* space id the page belongs to */
+#define FIL_PAGE_OFFSET		4	/* page offset inside space */
+#define FIL_PAGE_PREV		8	/* if there is a 'natural' predecessor
+					of the page, its offset */
+#define FIL_PAGE_NEXT		12	/* if there is a 'natural' successor
+					of the page, its offset */
+#define FIL_PAGE_LSN		16	/* lsn of the end of the newest
+					modification log record to the page */
+#define	FIL_PAGE_TYPE		24	/* file page type: FIL_PAGE_INDEX,...,
+					2 bytes */
+#define FIL_PAGE_FILE_FLUSH_LSN	26	/* this is only defined for the
+					first page in a data file: the file
+					has been flushed to disk at least up
+					to this lsn */
+#define FIL_PAGE_ARCH_LOG_NO	34	/* this is only defined for the
+					first page in a data file: the latest
+					archived log file number when the
+					flush lsn above was written */
+#define FIL_PAGE_DATA		38	/* start of the data on the page */
+
+/* File page trailer */
+#define FIL_PAGE_END_LSN	8	/* this should be same as
+					FIL_PAGE_LSN */
+#define FIL_PAGE_DATA_END	8
+
+/* File page types */
+#define FIL_PAGE_INDEX		17855
+#define FIL_PAGE_UNDO_LOG	2
+
+/* Space types */
+#define FIL_TABLESPACE 		501
+#define FIL_LOG			502
+
+/***********************************************************************
+Reserves a right to open a single file. The right must be released with
+fil_release_right_to_open. */
+
+void
+fil_reserve_right_to_open(void);
+/*===========================*/
+/***********************************************************************
+Releases a right to open a single file. */
+
+void
+fil_release_right_to_open(void);
+/*===========================*/
+/************************************************************************
+Returns TRUE if file address is undefined. */
+ibool
+fil_addr_is_null(
+/*=============*/
+				/* out: TRUE if undefined */
+	fil_addr_t	addr);	/* in: address */
+/********************************************************************
+Initializes the file system of this module. */
+
+void
+fil_init(
+/*=====*/
+	ulint	max_n_open);	/* in: max number of open files */
+/********************************************************************
+Initializes the ibuf indexes at a database start. This can be called
+after the file space headers have been created and the dictionary system
+has been initialized. */
+
+void
+fil_ibuf_init_at_db_start(void);
+/*===========================*/
+/***********************************************************************
+Creates a space object and puts it to the file system. */
+
+void
+fil_space_create(
+/*=============*/
+	char*	name,	/* in: space name */
+	ulint	id,	/* in: space id */
+	ulint	purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
+/********************************************************************
+Drops files from the start of a file space, so that its size is cut by
+the amount given. */
+
+void
+fil_space_truncate_start(
+/*=====================*/
+	ulint	id,		/* in: space id */
+	ulint	trunc_len);	/* in: truncate by this much; it is an error
+				if this does not equal to the combined size of
+				some initial files in the space */
+/***********************************************************************
+Frees a space object from a file system. Closes the files in the chain
+but does not delete them. */
+
+void
+fil_space_free(
+/*===========*/
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the latch of a file space. */
+
+rw_lock_t*
+fil_space_get_latch(
+/*================*/
+			/* out: latch protecting storage allocation */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the type of a file space. */
+
+ulint
+fil_space_get_type(
+/*===============*/
+			/* out: FIL_TABLESPACE or FIL_LOG */
+	ulint	id);	/* in: space id */
+/********************************************************************
+Writes the flushed lsn and the latest archived log number to the page
+header of the first page of each data file. */
+
+ulint
+fil_write_flushed_lsn_to_data_files(
+/*================================*/
+				/* out: DB_SUCCESS or error number */
+	dulint	lsn,		/* in: lsn to write */
+	ulint	arch_log_no);	/* in: latest archived log file number */
+/***********************************************************************
+Reads the flushed lsn and arch no fields from a data file at database
+startup. */
+
+void
+fil_read_flushed_lsn_and_arch_log_no(
+/*=================================*/
+	os_file_t data_file,		/* in: open data file */
+	ibool	one_read_already,	/* in: TRUE if min and max parameters
+					below already contain sensible data */
+	dulint*	min_flushed_lsn,	/* in/out: */
+	ulint*	min_arch_log_no,	/* in/out: */
+	dulint*	max_flushed_lsn,	/* in/out: */
+	ulint*	max_arch_log_no);	/* in/out: */
+/***********************************************************************
+Returns the ibuf data of a file space. */
+
+ibuf_data_t*
+fil_space_get_ibuf_data(
+/*====================*/
+			/* out: ibuf data for this space */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the size of the space in pages. */
+
+ulint
+fil_space_get_size(
+/*===============*/
+			/* out: space size */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Appends a new file to the chain of files of a space.
+File must be closed. */
+
+void
+fil_node_create(
+/*============*/
+	char*	name,	/* in: file name (file must be closed) */
+	ulint	size,	/* in: file size in database blocks, rounded downwards
+			to an integer */
+	ulint	id);	/* in: space id where to append */
+/************************************************************************
+Reads or writes data. This operation is asynchronous (aio). */
+
+void
+fil_io(
+/*===*/
+	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
+				ORed to OS_FILE_LOG, if a log i/o
+				and ORed to OS_AIO_SIMULATED_WAKE_LATER
+				if simulated aio and we want to post a
+				batch of i/os; NOTE that a simulated batch
+				may introduce hidden chances of deadlocks,
+				because i/os are not actually handled until
+				all have been posted: use with great
+				caution! */
+	ibool	sync,		/* in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/* in: space id */
+	ulint	block_offset,	/* in: offset in number of blocks */
+	ulint	byte_offset,	/* in: remainder of offset in bytes; in
+				aio this must be divisible by the OS block
+				size */
+	ulint	len,		/* in: how many bytes to read; this must
+				not cross a file boundary; in aio this must
+				be a block size multiple */
+	void*	buf,		/* in/out: buffer where to store read data
+				or from where to write; in aio this must be
+				appropriately aligned */
+	void*	message);	/* in: message for aio handler if non-sync
+				aio used, else ignored */
+/************************************************************************
+Reads data from a space to a buffer. Remember that the possible incomplete
+blocks at the end of a file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+void
+fil_read(
+/*=====*/
+	ibool	sync,		/* in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/* in: space id */
+	ulint	block_offset,	/* in: offset in number of blocks */
+	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
+				this must be divisible by the OS block size */
+	ulint	len,		/* in: how many bytes to read; this must not
+				cross a file boundary; in aio this must be a
+				block size multiple */
+	void*	buf,		/* in/out: buffer where to store data read;
+				in aio this must be appropriately aligned */
+	void*	message);	/* in: message for aio handler if non-sync
+				aio used, else ignored */
+/************************************************************************
+Writes data to a space from a buffer. Remember that the possible incomplete
+blocks at the end of a file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+void
+fil_write(
+/*======*/
+	ibool	sync,		/* in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/* in: space id */
+	ulint	block_offset,	/* in: offset in number of blocks */
+	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
+				this must be divisible by the OS block size */
+	ulint	len,		/* in: how many bytes to write; this must
+				not cross a file boundary; in aio this must
+				be a block size multiple */
+	void*	buf,		/* in: buffer from which to write; in aio
+				this must be appropriately aligned */
+	void*	message);	/* in: message for aio handler if non-sync
+				aio used, else ignored */
+/**************************************************************************
+Waits for an aio operation to complete. This function is used to write the
+handler for completed requests. The aio array of pending requests is divided
+into segments (see os0file.c for more info). The thread specifies which
+segment it wants to wait for. */
+
+void
+fil_aio_wait(
+/*=========*/
+	ulint	segment);	/* in: the number of the segment in the aio
+				array to wait for */ 
+/**************************************************************************
+Flushes to disk possible writes cached by the OS. */
+
+void
+fil_flush(
+/*======*/
+	ulint	space_id);	/* in: file space id (this can be a group of
+				log files or a tablespace of the database) */
+/**************************************************************************
+Flushes to disk writes in file spaces of the given type possibly cached by
+the OS. */
+
+void
+fil_flush_file_spaces(
+/*==================*/
+	ulint	purpose);	/* in: FIL_TABLESPACE, FIL_LOG */
+/**********************************************************************
+Checks the consistency of the file system. */
+
+ibool
+fil_validate(void);
+/*==============*/
+			/* out: TRUE if ok */
+/************************************************************************
+Accessor functions for a file page */
+
+ulint
+fil_page_get_prev(byte*	page);
+ulint
+fil_page_get_next(byte*	page);
+/*************************************************************************
+Sets the file page type. */
+
+void
+fil_page_set_type(
+/*==============*/
+	byte* 	page,	/* in: file page */
+	ulint	type);	/* in: type */
+/*************************************************************************
+Gets the file page type. */
+
+ulint
+fil_page_get_type(
+/*==============*/
+			/* out: type; NOTE that if the type has not been
+			written to page, the return value not defined */
+	byte* 	page);	/* in: file page */
+/***********************************************************************
+Tries to reserve free extents in a file space. */
+
+ibool
+fil_space_reserve_free_extents(
+/*===========================*/
+				/* out: TRUE if succeed */
+	ulint	id,		/* in: space id */
+	ulint	n_free_now,	/* in: number of free extents now */
+	ulint	n_to_reserve);	/* in: how many one wants to reserve */
+/***********************************************************************
+Releases free extents in a file space. */
+
+void
+fil_space_release_free_extents(
+/*===========================*/
+	ulint	id,		/* in: space id */
+	ulint	n_reserved);	/* in: how many one reserved */
+
+typedef	struct fil_space_struct	fil_space_t;
+
+#endif
diff --git a/innobase/include/fsp0fsp.h b/innobase/include/fsp0fsp.h
new file mode 100644
index 00000000000..f1be4de4d40
--- /dev/null
+++ b/innobase/include/fsp0fsp.h
@@ -0,0 +1,331 @@
+/******************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fsp0fsp_h
+#define fsp0fsp_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "fut0lst.h"
+#include "ut0byte.h"
+#include "page0types.h"
+
+/* If records are inserted in order, there are the following
+flags to tell this (their type is made byte for the compiler
+to warn if direction and hint parameters are switched in
+fseg_alloc_free_page): */
+#define	FSP_UP		((byte)111)	/* alphabetically upwards */
+#define	FSP_DOWN	((byte)112)	/* alphabetically downwards */
+#define	FSP_NO_DIR	((byte)113)	/* no order */
+
+/* File space extent size in pages */
+#define	FSP_EXTENT_SIZE		64
+
+/* On a page of any file segment, data may be put starting from this offset: */
+#define FSEG_PAGE_DATA		FIL_PAGE_DATA
+
+/* File segment header which points to the inode describing the file segment */
+typedef	byte	fseg_header_t;
+
+#define FSEG_HDR_SPACE		0	/* space id of the inode */
+#define FSEG_HDR_PAGE_NO	4	/* page number of the inode */
+#define FSEG_HDR_OFFSET		8	/* byte offset of the inode */
+
+#define FSEG_HEADER_SIZE	10
+
+/**************************************************************************
+Initializes the file space system. */
+
+void
+fsp_init(void);
+/*==========*/
+/**************************************************************************
+Initializes the space header of a new created space. */
+
+void
+fsp_header_init(
+/*============*/
+	ulint	space,	/* in: space id */
+	ulint	size,	/* in: current size in blocks */
+	mtr_t*	mtr);	/* in: mini-transaction handle */	
+/**************************************************************************
+Increases the space size field of a space. */
+
+void
+fsp_header_inc_size(
+/*================*/
+	ulint	space,	/* in: space id */
+	ulint	size_inc,/* in: size increment in pages */
+	mtr_t*	mtr);	/* in: mini-transaction handle */	
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create(
+/*========*/
+			/* out: the page where the segment header is placed,
+			x-latched, FIL_NULL if could not create segment
+			because of lack of space */
+	ulint	space,	/* in: space id */
+	ulint	page,	/* in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /* in: byte offset of the created segment header
+			on the page */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create_general(
+/*================*/
+			/* out: the page where the segment header is placed,
+			x-latched, NULL if could not create segment
+			because of lack of space */
+	ulint	space,	/* in: space id */
+	ulint	page,	/* in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /* in: byte offset of the created segment header
+			on the page */
+	ibool	has_done_reservation, /* in: TRUE if the caller has
+			already done the reservation for the pages
+			with fsp_reserve_free_extents (at least 2 extents:
+			one for the inode and, then there other for the
+			segment) is no need to do the check for this
+			individual operation */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used. */
+
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+				/* out: number of reserved pages */
+	fseg_header_t* 	header,	/* in: segment header */
+	ulint*		used,	/* out: number of pages used (<= reserved) */
+	mtr_t*		mtr);	/* in: mtr handle */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize
+file space fragmentation. */
+
+ulint
+fseg_alloc_free_page(
+/*=================*/
+				/* out: the allocated page offset
+				FIL_NULL if no page could be allocated */
+	fseg_header_t*	seg_header, /* in: segment header */
+	ulint		hint,	/* in: hint of which page would be desirable */
+	byte		direction, /* in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	mtr_t*		mtr);	/* in: mtr handle */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+
+ulint
+fseg_alloc_free_page_general(
+/*=========================*/
+				/* out: allocated page offset, FIL_NULL if no
+				page could be allocated */
+	fseg_header_t*	seg_header,/* in: segment header */
+	ulint		hint,	/* in: hint of which page would be desirable */
+	byte		direction,/* in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	ibool		has_done_reservation, /* in: TRUE if the caller has
+				already done the reservation for the page
+				with fsp_reserve_free_extents, then there
+				is no need to do the check for this individual
+				page */
+	mtr_t*		mtr);	/* in: mtr handle */
+/**************************************************************************
+Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_release_free_extents!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space. */ 
+
+ibool
+fsp_reserve_free_extents(
+/*=====================*/
+			/* out: TRUE if we were able to make the reservation */
+	ulint	space,	/* in: space id */
+	ulint	n_ext,	/* in: number of extents to reserve */
+	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+This function should be used to get information on how much we still
+will be able to insert new data to the database without running out the
+tablespace. Only free extents are taken into account and we also subtract
+the safety margin required by the above function fsp_reserve_free_extents. */
+
+ulint
+fsp_get_available_space_in_free_extents(
+/*====================================*/
+			/* out: available space in kB */
+	ulint	space);	/* in: space id */
+/**************************************************************************
+Frees a single page of a segment. */
+
+void
+fseg_free_page(
+/*===========*/
+	fseg_header_t*	seg_header, /* in: segment header */
+	ulint		space,	/* in: space id */
+	ulint		page,	/* in: page offset */
+	mtr_t*		mtr);	/* in: mtr handle */
+/***********************************************************************
+Frees a segment. The freeing is performed in several mini-transactions,
+so that there is no danger of bufferfixing too many buffer pages. */
+
+void
+fseg_free(
+/*======*/
+	ulint	space,	/* in: space id */
+	ulint	page_no,/* in: page number where the segment header is
+			placed */
+	ulint	offset);/* in: byte offset of the segment header on that
+			page */
+/**************************************************************************
+Frees part of a segment. This function can be used to free a segment
+by repeatedly calling this function in different mini-transactions.
+Doing the freeing in a single mini-transaction might result in
+too big a mini-transaction. */
+
+ibool
+fseg_free_step(
+/*===========*/
+				/* out: TRUE if freeing completed */
+	fseg_header_t*	header,	/* in, own: segment header; NOTE: if the header
+				resides on the first page of the frag list
+				of the segment, this pointer becomes obsolete
+				after the last freeing step */
+	mtr_t*		mtr);	/* in: mtr */
+/**************************************************************************
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed. */
+
+ibool
+fseg_free_step_not_header(
+/*======================*/
+				/* out: TRUE if freeing completed, except the
+				header page */
+	fseg_header_t*	header,	/* in: segment header which must reside on
+				the first fragment page of the segment */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************************
+Checks if a page address is an extent descriptor page address. */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+			/* out: TRUE if a descriptor page */
+	ulint	page_no);/* in: page number */
+/***************************************************************
+Parses a redo log record of a file page init. */
+
+byte*
+fsp_parse_init_file_page(
+/*=====================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */
+/***********************************************************************
+Validates the file space system and its segments. */
+
+ibool
+fsp_validate(
+/*=========*/
+			/* out: TRUE if ok */
+	ulint	space);	/* in: space id */
+/***********************************************************************
+Prints info of a file space. */
+
+void
+fsp_print(
+/*======*/
+	ulint	space);	/* in: space id */
+/***********************************************************************
+Validates a segment. */
+
+ibool
+fseg_validate(
+/*==========*/
+				/* out: TRUE if ok */
+	fseg_header_t*	header, /* in: segment header */
+	mtr_t*		mtr2);	/* in: mtr */
+/***********************************************************************
+Writes info of a segment. */
+
+void
+fseg_print(
+/*=======*/
+	fseg_header_t*	header, /* in: segment header */
+	mtr_t*		mtr);	/* in: mtr */
+
+/* Flags for fsp_reserve_free_extents */
+#define FSP_NORMAL	1000000
+#define	FSP_UNDO	2000000
+#define FSP_CLEANING	3000000
+
+/* Number of pages described in a single descriptor page: currently each page
+description takes less than 1 byte; a descriptor page is repeated every
+this many file pages */
+#define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE
+
+/* The space low address page map, and also offsets for extent descriptor and
+bitmap pages which are repeated always after XDES_DESCRIBED_PER_PAGE more
+pages: */
+/*--------------------------------------*/
+#define FSP_XDES_OFFSET			0
+#define FSP_IBUF_BITMAP_OFFSET		1
+				/* The ibuf bitmap pages are the ones whose
+				page number is the number above plus a
+				multiple of XDES_DESCRIBED_PER_PAGE */
+#define FSP_FIRST_INODE_PAGE_NO		2
+#define FSP_IBUF_HEADER_PAGE_NO		3
+#define FSP_IBUF_TREE_ROOT_PAGE_NO	4
+				/* The ibuf tree root page number in each
+				tablespace; its fseg inode is on the page
+				number FSP_FIRST_INODE_PAGE_NO */
+#define FSP_TRX_SYS_PAGE_NO		5
+#define	FSP_FIRST_RSEG_PAGE_NO		6
+#define FSP_DICT_HDR_PAGE_NO		7
+/*--------------------------------------*/
+
+#ifndef UNIV_NONINL
+#include "fsp0fsp.ic"
+#endif
+
+#endif
diff --git a/innobase/include/fsp0fsp.ic b/innobase/include/fsp0fsp.ic
new file mode 100644
index 00000000000..89cd9263bd6
--- /dev/null
+++ b/innobase/include/fsp0fsp.ic
@@ -0,0 +1,24 @@
+/******************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+/***************************************************************************
+Checks if a page address is an extent descriptor page address. */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+			/* out: TRUE if a descriptor page */
+	ulint	page_no)/* in: page number */
+{
+	if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/innobase/include/fut0fut.h b/innobase/include/fut0fut.h
new file mode 100644
index 00000000000..b9546b4e1a0
--- /dev/null
+++ b/innobase/include/fut0fut.h
@@ -0,0 +1,36 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+
+#ifndef fut0fut_h
+#define fut0fut_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+/************************************************************************
+Gets a pointer to a file address and latches the page. */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+				/* out: pointer to a byte in a frame; the file
+				page in the frame is bufferfixed and latched */
+	ulint		space,	/* in: space id */
+	fil_addr_t	addr,	/* in: file address */
+	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr);	/* in: mtr handle */
+
+#ifndef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
+#endif
+
diff --git a/innobase/include/fut0fut.ic b/innobase/include/fut0fut.ic
new file mode 100644
index 00000000000..0f1aa9dd9ae
--- /dev/null
+++ b/innobase/include/fut0fut.ic
@@ -0,0 +1,36 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+
+/************************************************************************
+Gets a pointer to a file address and latches the page. */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+				/* out: pointer to a byte in a frame; the file
+				page in the frame is bufferfixed and latched */
+	ulint		space,	/* in: space id */
+	fil_addr_t	addr,	/* in: file address */
+	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr)	/* in: mtr handle */
+{
+	byte*	ptr;
+
+	ut_ad(mtr);
+	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
+
+	buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
+
+	return(ptr);
+}
diff --git a/innobase/include/fut0lst.h b/innobase/include/fut0lst.h
new file mode 100644
index 00000000000..5427e2248da
--- /dev/null
+++ b/innobase/include/fut0lst.h
@@ -0,0 +1,198 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef fut0lst_h
+#define fut0lst_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+
+/* The C 'types' of base node and list node: these should be used to
+write self-documenting code. Of course, the sizeof macro cannot be
+applied to these types! */
+
+typedef	byte	flst_base_node_t;
+typedef	byte	flst_node_t;
+
+/* The physical size of a list base node in bytes */
+#define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
+
+/* The physical size of a list node in bytes */
+#define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
+
+
+/************************************************************************
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Adds a node as the last node in a list. */
+
+void
+flst_add_last(
+/*==========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Adds a node as the first node in a list. */
+
+void
+flst_add_first(
+/*===========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Inserts a node after another in a list. */
+
+void
+flst_insert_after(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node1,	/* in: node to insert after */
+	flst_node_t*		node2,	/* in: node to add */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Inserts a node before another in a list. */
+
+void
+flst_insert_before(
+/*===============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: node to insert */
+	flst_node_t*		node3,	/* in: node to insert before */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Removes a node. */
+
+void
+flst_remove(
+/*========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: node to remove */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_cut_end(
+/*=========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: first node to remove */
+	ulint			n_nodes,/* in: number of nodes to remove,
+					must be >= 1 */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_truncate_end(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: first node not to remove */
+	ulint			n_nodes,/* in: number of nodes to remove */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list length. */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+					/* out: length */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list first node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list last node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list next node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list prev node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	fil_addr_t	addr,	/* in: file address */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Reads a file address. */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+				/* out: file address */
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Validates a file-based list. */
+
+ibool
+flst_validate(
+/*==========*/
+					/* out: TRUE if ok */
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	mtr_t*			mtr1);	/* in: mtr */
+/************************************************************************
+Prints info of a file-based list. */
+
+void
+flst_print(
+/*=======*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	mtr_t*			mtr);	/* in: mtr */
+
+
+#ifndef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#endif
diff --git a/innobase/include/fut0lst.ic b/innobase/include/fut0lst.ic
new file mode 100644
index 00000000000..d2e79cf7640
--- /dev/null
+++ b/innobase/include/fut0lst.ic
@@ -0,0 +1,147 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+#include "mtr0log.h"
+#include "buf0buf.h"
+
+/* We define the field offsets of a node for the list */
+#define FLST_PREV	0	/* 6-byte address of the previous list element;
+				the page part of address is FIL_NULL, if no
+				previous element */
+#define FLST_NEXT	FIL_ADDR_SIZE	/* 6-byte address of the next
+				list element; the page part of address
+				is FIL_NULL, if no next element */
+
+/* We define the field offsets of a base node for the list */
+#define FLST_LEN	0	/* 32-bit list length field */
+#define	FLST_FIRST	4	/* 6-byte address of the first element
+				of the list; undefined if empty list */
+#define	FLST_LAST	(4 + FIL_ADDR_SIZE) /* 6-byte address of the
+				first element of the list; undefined
+				if empty list */
+
+/************************************************************************
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	fil_addr_t	addr,	/* in: file address */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(faddr && mtr);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
+							MTR_MEMO_PAGE_X_FIX));
+
+	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr); 
+	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
+							MLOG_2BYTES, mtr); 
+}
+
+/************************************************************************
+Reads a file address. */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+				/* out: file address */
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	fil_addr_t	addr;
+
+	ut_ad(faddr && mtr);
+
+	addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr); 
+	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES, 
+								mtr);
+	return(addr); 
+}
+
+/************************************************************************
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+							MTR_MEMO_PAGE_X_FIX));
+	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr); 
+	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); 
+	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr); 
+}
+
+/************************************************************************
+Gets list length. */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+					/* out: length */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+}
+
+/************************************************************************
+Gets list first node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(base + FLST_FIRST, mtr));
+}
+
+/************************************************************************
+Gets list last node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(base + FLST_LAST, mtr));
+}
+
+/************************************************************************
+Gets list next node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(node + FLST_NEXT, mtr));
+}
+
+/************************************************************************
+Gets list prev node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(node + FLST_PREV, mtr));
+}
diff --git a/innobase/include/ha0ha.h b/innobase/include/ha0ha.h
new file mode 100644
index 00000000000..aeed7c32eff
--- /dev/null
+++ b/innobase/include/ha0ha.h
@@ -0,0 +1,137 @@
+/******************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/18/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef ha0ha_h
+#define ha0ha_h
+
+#include "univ.i"
+
+#include "hash0hash.h"
+#include "page0types.h"
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+				/* out: pointer to the data of the first hash
+				table node in chain having the fold number,
+				NULL if not found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold);	/* in: folded value of the searched data */
+/*************************************************************
+Looks for an element when we know the pointer to the data and updates
+the pointer to data if found. */
+UNIV_INLINE
+void
+ha_search_and_update_if_found(
+/*==========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data,	/* in: pointer to the data */
+	void*		new_data);/* in: new pointer to the data */
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+ha_create(
+/*======*/
+				/* out, own: created table */
+	ibool	in_btr_search,	/* in: TRUE if the hash table is used in
+				the btr_search module */
+	ulint	n,		/* in: number of array cells */
+	ulint	n_mutexes,	/* in: number of mutexes to protect the
+				hash table: must be a power of 2 */
+	ulint	mutex_level);	/* in: level of the mutexes in the latching
+				order: this is used in the debug version */
+/*****************************************************************
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted. */
+
+ibool
+ha_insert_for_fold(
+/*===============*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data);	/* in: data, must not be NULL */
+/*****************************************************************
+Reserves the necessary hash table mutex and inserts an entry into the hash
+table. */
+UNIV_INLINE
+ibool
+ha_insert_for_fold_mutex(
+/*=====================*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data);	/* in: data, must not be NULL */
+/*****************************************************************
+Deletes an entry from a hash table. */
+
+void
+ha_delete(
+/*======*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data */
+	void*		data);	/* in: data, must not be NULL and must exist
+				in the hash table */
+/*************************************************************
+Looks for an element when we know the pointer to the data and deletes
+it from the hash table if found. */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+				/* out: TRUE if found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data);	/* in: pointer to the data */
+/*********************************************************************
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: fold value */
+	page_t*		page);	/* in: buffer page */
+/*****************************************************************
+Validates a hash table. */
+
+ibool
+ha_validate(
+/*========*/
+				/* out: TRUE if ok */
+	hash_table_t*	table);	/* in: hash table */
+/*****************************************************************
+Prints info of a hash table. */
+
+void
+ha_print_info(
+/*==========*/
+	hash_table_t*	table);	/* in: hash table */
+
+
+#ifndef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/ha0ha.ic b/innobase/include/ha0ha.ic
new file mode 100644
index 00000000000..7b4c624c653
--- /dev/null
+++ b/innobase/include/ha0ha.ic
@@ -0,0 +1,280 @@
+/************************************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/18/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0rnd.h"
+#include "mem0mem.h"
+
+/* The hash table external chain node */
+
+typedef struct ha_node_struct ha_node_t;
+
+struct ha_node_struct {
+	ha_node_t* next; /* next chain node or NULL if none */
+	void*	data;	/* pointer to the data */
+	ulint	fold;	/* fold value for the data */
+};
+
+/***************************************************************
+Deletes a hash node. */
+
+void
+ha_delete_hash_node(
+/*================*/
+	hash_table_t*	table,		/* in: hash table */
+	ha_node_t*	del_node);	/* in: node to be deleted */
+
+/**********************************************************************
+Gets a hash node data. */
+UNIV_INLINE
+void*
+ha_node_get_data(
+/*=============*/
+				/* out: pointer to the data */
+	ha_node_t*	node)	/* in: hash chain node */
+{
+	return(node->data);
+}
+
+/**********************************************************************
+Sets hash node data. */
+UNIV_INLINE
+void
+ha_node_set_data(
+/*=============*/
+	ha_node_t*	node,	/* in: hash chain node */
+	void*		data)	/* in: pointer to the data */
+{
+	node->data = data;
+}
+
+/**********************************************************************
+Gets the next node in a hash chain. */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_next(
+/*==============*/
+				/* out: next node, NULL if none */
+	hash_table_t*	table,	/* in: hash table */
+	ha_node_t*	node)	/* in: hash chain node */
+{
+	ut_ad(table);
+
+	return(node->next);
+}
+
+/**********************************************************************
+Gets the first node in a hash chain. */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_first(
+/*===============*/
+				/* out: first node, NULL if none */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold)	/* in: fold value determining the chain */
+{
+	return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
+}
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+ha_node_t*
+ha_search(
+/*======*/
+				/* out: pointer to the first hash table node
+				in chain having the fold number, NULL if not
+				found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold)	/* in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(table, node);
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+				/* out: pointer to the data of the first hash
+				table node in chain having the fold number,
+				NULL if not found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold)	/* in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node->data);
+		}
+
+		node = ha_chain_get_next(table, node);
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************
+Returns the next matching hash table node in chain. */
+UNIV_INLINE
+ha_node_t*
+ha_next(
+/*====*/
+				/* out: pointer to the next hash table node
+				in chain with the fold value, NULL if not
+				found */
+	hash_table_t*	table,	/* in: hash table */
+	ha_node_t*	node)	/* in: hash table node */
+{
+	ulint	fold;
+
+	fold = node->fold;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_chain_get_next(table, node);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(table, node);
+	}
+
+	return(NULL);
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data. */
+UNIV_INLINE
+ha_node_t*
+ha_search_with_data(
+/*================*/
+				/* out: pointer to the hash table node, NULL
+				if not found in the table */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data)	/* in: pointer to the data */
+{
+	ha_node_t*	node;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->data == data) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(table, node);
+	}
+
+	return(NULL);
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data, and updates
+the pointer to data, if found. */
+UNIV_INLINE
+void
+ha_search_and_update_if_found(
+/*==========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data,	/* in: pointer to the data */
+	void*		new_data)/* in: new pointer to the data */
+{
+	ha_node_t*	node;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		node->data = new_data;
+	}
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data, and deletes
+it from the hash table, if found. */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+				/* out: TRUE if found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data)	/* in: pointer to the data */
+{
+	ha_node_t*	node;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		ha_delete_hash_node(table, node);
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*****************************************************************
+Reserves the necessary hash table mutex and inserts an entry into the hash
+table. */
+UNIV_INLINE
+ibool
+ha_insert_for_fold_mutex(
+/*=====================*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data)	/* in: data, must not be NULL */
+{
+	ibool	ret;
+
+	hash_mutex_enter(table, fold);
+
+	ret = ha_insert_for_fold(table, fold, data);
+
+	hash_mutex_exit(table, fold);
+
+	return(ret);
+}
diff --git a/innobase/include/hash0hash.h b/innobase/include/hash0hash.h
new file mode 100644
index 00000000000..378925a5bea
--- /dev/null
+++ b/innobase/include/hash0hash.h
@@ -0,0 +1,345 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef hash0hash_h
+#define hash0hash_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "sync0sync.h"
+
+typedef struct hash_table_struct hash_table_t;
+typedef struct hash_cell_struct hash_cell_t;
+
+typedef void*	hash_node_t;
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+hash_create(
+/*========*/
+			/* out, own: created table */
+	ulint	n);	/* in: number of array cells */
+/*****************************************************************
+Creates a mutex array to protect a hash table. */
+
+void
+hash_create_mutexes(
+/*================*/
+	hash_table_t*	table,		/* in: hash table */
+	ulint		n_mutexes,	/* in: number of mutexes */
+	ulint		sync_level);	/* in: latching order level of the
+					mutexes: used in the debug version */
+/*****************************************************************
+Frees a hash table. */
+
+void
+hash_table_free(
+/*============*/
+	hash_table_t*	table);	/* in, own: hash table */
+/******************************************************************
+Calculates the hash value from a folded value. */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+				/* out: hashed value */
+	ulint		fold,	/* in: folded value */
+	hash_table_t*	table);	/* in: hash table */
+/***********************************************************************
+Inserts a struct to a hash table. */
+
+#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
+{\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+\
+	ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));\
+\
+	(DATA)->NAME = NULL;\
+\
+	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	if (cell3333->node == NULL) {\
+		cell3333->node = DATA;\
+	} else {\
+		struct3333 = cell3333->node;\
+\
+		while (struct3333->NAME != NULL) {\
+\
+			struct3333 = struct3333->NAME;\
+		}\
+\
+		struct3333->NAME = DATA;\
+	}\
+}
+
+/***********************************************************************
+Deletes a struct from a hash table. */
+
+#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
+{\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+\
+	ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));\
+\
+	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	if (cell3333->node == DATA) {\
+		cell3333->node = DATA->NAME;\
+	} else {\
+		struct3333 = cell3333->node;\
+\
+		while (struct3333->NAME != DATA) {\
+\
+			ut_ad(struct3333)\
+			struct3333 = struct3333->NAME;\
+		}\
+\
+		struct3333->NAME = DATA->NAME;\
+	}\
+}
+
+/***********************************************************************
+Gets the first struct in a hash chain, NULL if none. */
+
+#define HASH_GET_FIRST(TABLE, HASH_VAL)\
+	(hash_get_nth_cell(TABLE, HASH_VAL)->node)
+
+/***********************************************************************
+Gets the next struct in a hash chain, NULL if none. */
+
+#define HASH_GET_NEXT(NAME, DATA)	((DATA)->NAME)
+
+/************************************************************************
+Looks for a struct in a hash table. */
+#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
+{\
+\
+	ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));\
+\
+	(DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	while ((DATA) != NULL) {\
+		if (TEST) {\
+			break;\
+		} else {\
+			(DATA) = HASH_GET_NEXT(NAME, DATA);\
+		}\
+	}\
+}
+
+/****************************************************************
+Gets the nth cell in a hash table. */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+				/* out: pointer to cell */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		n);	/* in: cell index */
+/*****************************************************************
+Returns the number of cells in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+				/* out: number of cells */
+	hash_table_t*	table);	/* in: table */
+/***********************************************************************
+Deletes a struct which is stored in the heap of the hash table, and compacts
+the heap. The fold value must be stored in the struct NODE in a field named
+'fold'. */
+
+#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
+{\
+	TYPE*		node111;\
+	TYPE*		top_node111;\
+	hash_cell_t*	cell111;\
+	ulint		fold111;\
+\
+	fold111 = (NODE)->fold;\
+\
+	HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
+\
+	top_node111 = (TYPE*)mem_heap_get_top(\
+				hash_get_heap(TABLE, fold111),\
+							sizeof(TYPE));\
+\
+	/* If the node to remove is not the top node in the heap, compact the\
+	heap of nodes by moving the top node in the place of NODE. */\
+\
+	if (NODE != top_node111) {\
+\
+		/* Copy the top node in place of NODE */\
+\
+		*(NODE) = *top_node111;\
+\
+		cell111 = hash_get_nth_cell(TABLE,\
+				hash_calc_hash(top_node111->fold, TABLE));\
+\
+		/* Look for the pointer to the top node, to update it */\
+\
+		if (cell111->node == top_node111) {\
+			/* The top node is the first in the chain */\
+\
+			cell111->node = NODE;\
+		} else {\
+			/* We have to look for the predecessor of the top\
+			node */\
+			node111 = cell111->node;\
+\
+			while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
+\
+				node111 = HASH_GET_NEXT(NAME, node111);\
+			}\
+\
+			/* Now we have the predecessor node */\
+\
+			node111->NAME = NODE;\
+		}\
+	}\
+\
+	/* Free the space occupied by the top node */\
+\
+	mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
+}
+
+/***********************************************************************
+Calculates the number of stored structs in a hash table. */
+
+#define HASH_GET_N_NODES(TYPE, NAME, TABLE, N)\
+{\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+	ulint		i3333;\
+\
+	(N) = 0;\
+\
+	for (i3333 = 0; i3333 < hash_get_n_cells(TABLE); i3333++) {\
+\
+		cell3333 = hash_get_nth_cell(TABLE, i3333);\
+\
+		struct3333 = cell3333->node;\
+\
+		while (struct3333) {\
+\
+			(N) = (N) + 1;\
+\
+			struct = HASH_GET_NEXT(NAME, struct3333);\
+		}\
+	}\
+}
+
+/****************************************************************
+Gets the mutex index for a fold value in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+				/* out: mutex number */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Gets the nth heap in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i);	/* in: index of the heap */
+/****************************************************************
+Gets the heap for a fold value in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Gets the nth mutex in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i);	/* in: index of the mutex */
+/****************************************************************
+Gets the mutex for a fold value in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Reserves the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_enter(
+/*=============*/
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Releases the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_exit(
+/*============*/
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Reserves all the mutexes of a hash table, in an ascending order. */
+
+void
+hash_mutex_enter_all(
+/*=================*/
+	hash_table_t* 	table);	/* in: hash table */
+/****************************************************************
+Releases all the mutexes of a hash table. */
+
+void
+hash_mutex_exit_all(
+/*================*/
+	hash_table_t* 	table);	/* in: hash table */
+
+
+struct hash_cell_struct{
+	void*	node;	/* hash chain node, NULL if none */
+};
+
+/* The hash table structure */
+struct hash_table_struct {
+	ulint		n_cells;/* number of cells in the hash table */
+	hash_cell_t*	array;	/* pointer to cell array */
+	ulint		n_mutexes;/* if mutexes != NULL, then the number of
+				mutexes, must be a power of 2 */
+	mutex_t*	mutexes;/* NULL, or an array of mutexes used to
+				protect segments of the hash table */
+	mem_heap_t**	heaps;	/* if this is non-NULL, hash chain nodes for
+				external chaining can be allocated from these
+				memory heaps; there are then n_mutexes many of
+				these heaps */
+	mem_heap_t*	heap;
+	ulint		magic_n;
+};
+
+#define HASH_TABLE_MAGIC_N	76561114
+
+#ifndef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#endif
diff --git a/innobase/include/hash0hash.ic b/innobase/include/hash0hash.ic
new file mode 100644
index 00000000000..3ed2f9088dd
--- /dev/null
+++ b/innobase/include/hash0hash.ic
@@ -0,0 +1,131 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ut0rnd.h"
+
+/****************************************************************
+Gets the nth cell in a hash table. */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+				/* out: pointer to cell */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		n)	/* in: cell index */
+{
+	ut_ad(n >= 0);
+	ut_ad(n < table->n_cells);
+
+	return(table->array + n);
+}
+
+/*****************************************************************
+Returns the number of cells in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+				/* out: number of cells */
+	hash_table_t*	table)	/* in: table */
+{
+	return(table->n_cells);
+}
+
+/******************************************************************
+Calculates the hash value from a folded value. */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+				/* out: hashed value */
+	ulint		fold,	/* in: folded value */
+	hash_table_t*	table)	/* in: hash table */
+{
+	return(ut_hash_ulint(fold, table->n_cells));
+}
+
+/****************************************************************
+Gets the mutex index for a fold value in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+				/* out: mutex number */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	return(ut_2pow_remainder(fold, table->n_mutexes));
+}
+
+/****************************************************************
+Gets the nth heap in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i)	/* in: index of the heap */
+{
+	ut_ad(i < table->n_mutexes);
+
+	return(table->heaps[i]);
+}
+
+/****************************************************************
+Gets the heap for a fold value in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	ulint	i;
+
+	if (table->heap) {
+		return(table->heap);
+	}
+
+	i = hash_get_mutex_no(table, fold);
+
+	return(hash_get_nth_heap(table, i));
+}
+
+/****************************************************************
+Gets the nth mutex in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i)	/* in: index of the mutex */
+{
+	ut_ad(i < table->n_mutexes);
+	
+	return(table->mutexes + i);
+}
+
+/****************************************************************
+Gets the mutex for a fold value in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	ulint	i;
+
+	i = hash_get_mutex_no(table, fold);
+
+	return(hash_get_nth_mutex(table, i));
+}
diff --git a/innobase/include/ib_odbc.h b/innobase/include/ib_odbc.h
new file mode 100644
index 00000000000..86884b41d39
--- /dev/null
+++ b/innobase/include/ib_odbc.h
@@ -0,0 +1,149 @@
+/******************************************************
+Innobase ODBC client library header; this is equivalent to
+the standard sql.h ODBC header file
+
+(c) 1998 Innobase Oy
+
+Created 2/22/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef ib_odbc_h
+#define ib_odbc_h
+
+typedef unsigned char       UCHAR;
+typedef signed char         SCHAR;
+typedef long int            SDWORD;
+typedef short int           SWORD;
+typedef unsigned long int   UDWORD;
+typedef unsigned short int  UWORD;
+
+typedef void*               PTR;
+
+typedef void*               HENV;
+typedef void*               HDBC;
+typedef void*               HSTMT;
+
+typedef signed short        RETCODE;
+
+/* RETCODEs */
+#define SQL_NO_DATA_FOUND	(-3)
+#define SQL_INVALID_HANDLE	(-2)
+#define SQL_ERROR		(-1)
+#define SQL_SUCCESS 		0
+
+/* Standard SQL datatypes, using ANSI type numbering */
+#define SQL_CHAR		1
+#define SQL_INTEGER 		4
+#define SQL_VARCHAR 		12
+
+/* C datatype to SQL datatype mapping */
+#define SQL_C_CHAR	  SQL_CHAR
+#define SQL_C_LONG	  SQL_INTEGER
+
+/* Special length value */
+#define SQL_NULL_DATA		(-1)
+
+#define SQL_PARAM_INPUT         1
+#define SQL_PARAM_OUTPUT	4
+
+/* Null handles */
+#define SQL_NULL_HENV		NULL
+#define SQL_NULL_HDBC		NULL
+#define SQL_NULL_HSTM		NULL
+
+
+/**************************************************************************
+Allocates an SQL environment. */
+
+RETCODE
+SQLAllocEnv(
+/*========*/
+			/* out: SQL_SUCCESS */
+	HENV*	phenv);	/* out: pointer to an environment handle */
+/**************************************************************************
+Allocates an SQL connection. */
+
+RETCODE
+SQLAllocConnect(
+/*============*/
+			/* out: SQL_SUCCESS */
+	HENV	henv,	/* in: pointer to an environment handle */
+	HDBC*	phdbc);	/* out: pointer to a connection handle */
+/**************************************************************************
+Allocates an SQL statement. */
+
+RETCODE
+SQLAllocStmt(
+/*=========*/
+	HDBC	hdbc,	/* in: SQL connection */
+	HSTMT*	phstmt);	/* out: pointer to a statement handle */
+/**************************************************************************
+Connects to a database server process (establishes a connection and a
+session). */
+
+RETCODE
+SQLConnect(
+/*=======*/
+				/* out: SQL_SUCCESS */
+	HDBC	hdbc,		/* in: SQL connection handle */
+	UCHAR*	szDSN,		/* in: data source name (server name) */
+	SWORD	cbDSN,		/* in: data source name length */
+	UCHAR*	szUID,		/* in: user name */
+	SWORD	cbUID,		/* in: user name length */
+	UCHAR*	szAuthStr,	/* in: password */
+	SWORD	cbAuthStr);	/* in: password length */
+/**************************************************************************
+Makes the server to parse and optimize an SQL string. */
+
+RETCODE
+SQLPrepare(
+/*=======*/
+				/* out: SQL_SUCCESS */
+	HSTMT	hstmt,		/* in: statement handle */
+	UCHAR*	szSqlStr,	/* in: SQL string */
+	SDWORD	cbSqlStr);	/* in: SQL string length */
+/**************************************************************************
+Binds a parameter in a prepared statement. */
+
+RETCODE
+SQLBindParameter(
+/*=============*/
+				/* out: SQL_SUCCESS */
+	HSTMT	hstmt,		/* in: statement handle */
+	UWORD	ipar,		/* in: parameter index, starting from 1 */
+	SWORD	fParamType,	/* in: SQL_PARAM_INPUT or SQL_PARAM_OUTPUT */
+	SWORD	fCType,		/* in: SQL_C_CHAR, ... */
+	SWORD	fSqlType,	/* in: SQL_CHAR, ... */
+	UDWORD	cbColDef,	/* in: precision: ignored */
+	SWORD	ibScale,	/* in: scale: ignored */
+	PTR	rgbValue,	/* in: pointer to a buffer for the data */
+	SDWORD	cbValueMax,	/* in: buffer size */
+	SDWORD*	pcbValue);	/* in: pointer to a buffer for the data
+				length or SQL_NULL_DATA */
+/**************************************************************************
+Executes a prepared statement where all parameters have been bound. */
+
+RETCODE
+SQLExecute(
+/*=======*/
+			/* out: SQL_SUCCESS or SQL_ERROR */
+	HSTMT	hstmt);	/* in: statement handle */
+/**************************************************************************
+Queries an error message. */
+
+RETCODE
+SQLError(
+/*=====*/
+				/* out: SQL_SUCCESS or SQL_NO_DATA_FOUND */
+	HENV	henv,		/* in: SQL_NULL_HENV */
+	HDBC	hdbc,		/* in: SQL_NULL_HDBC */
+	HSTMT	hstmt,		/* in: statement handle */
+	UCHAR*	szSqlState,	/* in/out: SQLSTATE as a null-terminated string,
+				(currently, always == "S1000") */
+	SDWORD*	pfNativeError,	/* out: native error code */
+	UCHAR*	szErrorMsg,	/* in/out: buffer for an error message as a
+				null-terminated string */
+	SWORD	cbErrorMsgMax,	/* in: buffer size for szErrorMsg */
+	SWORD*	pcbErrorMsg);	/* out: error message length */
+
+#endif 
diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h
new file mode 100644
index 00000000000..f0b333192de
--- /dev/null
+++ b/innobase/include/ibuf0ibuf.h
@@ -0,0 +1,268 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0ibuf_h
+#define ibuf0ibuf_h
+
+#include "univ.i"
+
+#include "dict0mem.h"
+#include "dict0dict.h"
+#include "mtr0mtr.h"
+#include "que0types.h"
+#include "ibuf0types.h"
+#include "fsp0fsp.h"
+
+extern ibuf_t*	ibuf;
+
+/**********************************************************************
+Creates the insert buffer data struct for a single tablespace. Reads the
+root page of the insert buffer tree in the tablespace. This function can
+be called only after the dictionary system has been initialized, as this
+creates also the insert buffer table and index for this tablespace. */
+
+ibuf_data_t*
+ibuf_data_init_for_space(
+/*=====================*/
+			/* out, own: ibuf data struct, linked to the list
+			in ibuf control structure. */
+	ulint	space);	/* in: space id */
+/**********************************************************************
+Creates the insert buffer data structure at a database startup and
+initializes the data structures for the insert buffer of each tablespace. */
+
+void
+ibuf_init_at_db_start(void);
+/*=======================*/
+/*************************************************************************
+Initializes an ibuf bitmap page. */
+
+void
+ibuf_bitmap_page_init(
+/*==================*/
+	page_t*	page,	/* in: bitmap page */
+	mtr_t*	mtr);	/* in: mtr */
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to only ibuf bitmap operations, which would result if the latch to the
+bitmap page were kept. */
+
+void
+ibuf_reset_free_bits_with_type(
+/*===========================*/
+	ulint	type,	/* in: index type */
+	page_t*	page);	/* in: index page; free bits are set to 0 if the index
+			is non-clustered and non-unique and the page level is
+			0 */
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to solely ibuf bitmap operations, which would result if the latch to
+the bitmap page were kept. */
+
+void
+ibuf_reset_free_bits(
+/*=================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page);	/* in: index page; free bits are set to 0 if
+				the index is non-clustered and non-unique and
+				the page level is 0 */
+/****************************************************************************
+Updates the free bits of the page in the ibuf bitmap if there is not enough
+free on the page any more. This is done in a separate mini-transaction, hence
+this operation does not restrict further work to only ibuf bitmap operations,
+which would result if the latch to the bitmap page were kept. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page,	/* in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/* in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase);/* in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+/**************************************************************************
+Updates the free bits for the page to reflect the present state. Does this
+in the mtr given, which means that the latching order rules virtually
+prevent any further operations for this OS thread until mtr is committed. */
+
+void
+ibuf_update_free_bits_low(
+/*======================*/
+	dict_index_t*	index,		/* in: index */
+	page_t*		page,		/* in: index page */
+	ulint		max_ins_size,	/* in: value of maximum insert size
+					with reorganize before the latest
+					operation performed to the page */
+	mtr_t*		mtr);		/* in: mtr */
+/**************************************************************************
+Updates the free bits for the two pages to reflect the present state. Does
+this in the mtr given, which means that the latching order rules virtually
+prevent any further operations until mtr is committed. */
+
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page1,	/* in: index page */
+	page_t*		page2,	/* in: index page */
+	mtr_t*		mtr);	/* in: mtr */
+/**************************************************************************
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index);	/* in: index where to insert */
+/**********************************************************************
+Returns TRUE if the current OS thread is performing an insert buffer
+routine. */
+
+ibool
+ibuf_inside(void);
+/*=============*/
+		/* out: TRUE if inside an insert buffer routine: for instance,
+		a read-ahead of non-ibuf pages is then forbidden */
+/***************************************************************************
+Checks if a page address is an ibuf bitmap page (level 3 page) address. */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+			/* out: TRUE if a bitmap page */
+	ulint	page_no);/* in: page number */
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page(
+/*======*/
+			/* out: TRUE if level 2 or level 3 page */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page_low(
+/*==========*/
+			/* out: TRUE if level 2 or level 3 page */
+	ulint	space,	/* in: space id */
+	ulint	page_no,/* in: page number */
+	mtr_t*	mtr);	/* in: mtr which will contain an x-latch to the
+			bitmap page if the page is not one of the fixed
+			address ibuf pages */
+/*************************************************************************
+Checks if an index page has so much free space that the free bit should
+be set TRUE in the ibuf bitmap. */
+
+ibool
+ibuf_index_page_has_free(
+/*=====================*/
+			/* out: TRUE if there is enough free space */
+	page_t*	page);	/* in: non-unique secondary index page */
+/***************************************************************************
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */ 
+
+void
+ibuf_free_excess_pages(
+/*===================*/
+	ulint	space);	/* in: space id */
+/*************************************************************************
+Makes an index insert to the insert buffer, instead of directly to the disk
+page, if this is possible. Does not do insert if the index is clustered
+or unique. */
+
+ibool
+ibuf_insert(
+/*========*/
+				/* out: TRUE if success */
+	dtuple_t*	entry,	/* in: index entry to insert */
+	dict_index_t*	index,	/* in: index where to insert */
+	ulint		space,	/* in: space id where to insert */
+	ulint		page_no,/* in: page number where to insert */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+When an index page is read from a disk to the buffer pool, this function
+inserts to the page the possible index entries buffered in the insert buffer.
+The entries are deleted from the insert buffer. If the page is not read, but
+created in the buffer pool, this function deletes its buffered entries from
+the insert buffer; note that there can exist entries if the page belonged to
+an index which was dropped. */
+
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+	page_t*	page,	/* in: if page has been read from disk, pointer to
+			the page x-latched, else NULL */
+	ulint	space,	/* in: space id of the index page */
+	ulint	page_no);/* in: page number of the index page */
+/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract(
+/*==========*/
+			/* out: a lower limit for the combined size in bytes
+			of entries which will be merged from ibuf trees to the
+			pages read, 0 if ibuf is empty */
+	ibool	sync);	/* in: TRUE if the caller wants to wait for the
+			issued read with the highest tablespace address
+			to complete */
+/*************************************************************************
+Parses a redo log record of an ibuf bitmap page init. */
+
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/**********************************************************************
+Gets the ibuf count for a given page. */
+
+ulint
+ibuf_count_get(
+/*===========*/
+			/* out: number of entries in the insert buffer
+			currently buffered for this page */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/**********************************************************************
+Prints info of ibuf. */
+
+void
+ibuf_print(void);
+/*============*/
+
+#define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
+#define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
+
+/* The ibuf header page currently contains only the file segment header
+for the file segment from which the pages for the ibuf tree are allocated */
+#define IBUF_HEADER		PAGE_DATA
+#define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
+
+#ifndef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/ibuf0ibuf.ic b/innobase/include/ibuf0ibuf.ic
new file mode 100644
index 00000000000..e969a0550da
--- /dev/null
+++ b/innobase/include/ibuf0ibuf.ic
@@ -0,0 +1,226 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+#include "page0page.h"
+
+extern ulint	ibuf_flush_count;
+
+/* If this number is n, an index page must contain at least the page size
+per n bytes of free space for ibuf to try to buffer inserts to this page.
+If there is this much of free space, the corresponding bits are set in the
+ibuf bitmap. */
+#define IBUF_PAGE_SIZE_PER_FREE_SPACE	32
+
+/* Insert buffer data struct for a single tablespace */
+struct ibuf_data_struct{
+	ulint		space;	/* space id */
+	ulint		seg_size;/* allocated pages if the file segment
+				containing ibuf header and tree */
+	ulint		size;	/* size of the insert buffer tree in pages */
+	ibool		empty;	/* after an insert to the ibuf tree is
+				performed, this is set to FALSE, and if a
+				contract operation finds the tree empty, this
+				is set to TRUE */
+	ulint		free_list_len;
+				/* length of the free list */
+	ulint		height;	/* tree height */
+	dict_index_t*	index;	/* insert buffer index */
+	UT_LIST_NODE_T(ibuf_data_t) data_list;
+				/* list of ibuf data structs */
+	ulint		n_inserts;/* number of inserts made to the insert
+				buffer */
+	ulint		n_merges;/* number of pages merged */
+	ulint		n_merged_recs;/* number of records merged */
+};
+
+/* If the ibuf meter exceeds this value, then the suitable inserts are made to
+the insert buffer instead of directly to the disk page */
+#define IBUF_THRESHOLD	50	
+
+struct ibuf_struct{
+	ulint		size;		/* current size of the ibuf index
+					trees in pages */
+	ulint		max_size;	/* recommended maximum size in pages
+					for the ibuf index tree */
+	ulint		meter;		/* heuristic meter which measures
+					desirability of doing inserts to the
+					insert buffer instead of directly to
+					the disk page */
+	UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
+					/* list of ibuf data structs for
+					each tablespace */
+};
+
+/****************************************************************************
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+
+void
+ibuf_set_free_bits(
+/*===============*/
+	ulint	type,	/* in: index type */
+	page_t*	page,	/* in: index page; free bit is reset if the index is
+			a non-clustered non-unique, and page level is 0 */
+	ulint	val,	/* in: value to set: < 4 */
+	ulint	max_val);/* in: ULINT_UNDEFINED or a maximum value which
+			the bits must have before setting; this is for
+			debugging */
+
+/**************************************************************************
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index)	/* in: index where to insert */
+{
+	if (!(index->type & (DICT_CLUSTERED | DICT_UNIQUE))
+					&& ibuf->meter > IBUF_THRESHOLD) {
+		ibuf_flush_count++;
+
+		if (ibuf_flush_count % 8 == 0) {
+	    
+			buf_LRU_try_free_flushed_blocks();
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***************************************************************************
+Checks if a page address is an ibuf bitmap page address. */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+			/* out: TRUE if a bitmap page */
+	ulint	page_no)/* in: page number */
+{
+	if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Translates the free space on a page to a value in the ibuf bitmap.*/
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_bits(
+/*===========================*/
+				/* out: value for ibuf bitmap bits */
+	ulint	max_ins_size)	/* in: maximum insert size after reorganize
+				for the page */
+{
+	ulint	n;
+	
+	n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+
+	if (n == 3) {
+		n = 2;
+	}
+	
+	if (n > 3) {
+		n = 3;
+	}
+
+	return(n);
+}
+
+/*************************************************************************
+Translates the ibuf free bits to the free space on a page in bytes. */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_from_bits(
+/*================================*/
+			/* out: maximum insert size after reorganize for the
+			page */
+	ulint	bits)	/* in: value for ibuf bitmap bits */
+{
+	ut_ad(bits < 4);
+
+	if (bits == 3) {
+		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+}
+
+/*************************************************************************
+Translates the free space on a page to a value in the ibuf bitmap.*/
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free(
+/*======================*/
+			/* out: value for ibuf bitmap bits */
+	page_t*	page)	/* in: non-unique secondary index page */
+{
+	return(ibuf_index_page_calc_free_bits(
+		page_get_max_insert_size_after_reorganize(page, 1)));
+}
+
+/****************************************************************************
+Updates the free bits of the page in the ibuf bitmap if there is not enough
+free on the page any more. This is done in a separate mini-transaction, hence
+this operation does not restrict further work to only ibuf bitmap operations,
+which would result if the latch to the bitmap page were kept. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page,	/* in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/* in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase)/* in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+{
+	ulint	before;
+	ulint	after;
+
+	before = ibuf_index_page_calc_free_bits(max_ins_size);
+
+	if (max_ins_size >= increase) {
+		ut_ad(ULINT_UNDEFINED > UNIV_PAGE_SIZE);
+		
+		after = ibuf_index_page_calc_free_bits(max_ins_size
+								- increase);
+#ifdef UNIV_IBUF_DEBUG
+		ut_a(after <= ibuf_index_page_calc_free(page));
+#endif
+	} else {
+		after = ibuf_index_page_calc_free(page);
+	}
+
+	if (after == 0) {
+		/* We move the page to front of the buffer pool LRU list:
+		the purpose of this is to prevent those pages to which we
+		cannot make inserts using the insert buffer from slipping
+		out of the buffer pool */
+
+		buf_page_make_young(page);
+	}
+
+	if (before > after) {
+		ibuf_set_free_bits(index->type, page, after, before);
+	}
+}
diff --git a/innobase/include/ibuf0types.h b/innobase/include/ibuf0types.h
new file mode 100644
index 00000000000..fb202ac44b0
--- /dev/null
+++ b/innobase/include/ibuf0types.h
@@ -0,0 +1,15 @@
+/******************************************************
+Insert buffer global types
+
+(c) 1997 Innobase Oy
+
+Created 7/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0types_h
+#define ibuf0types_h
+
+typedef struct ibuf_data_struct	ibuf_data_t;
+typedef	struct ibuf_struct	ibuf_t;
+
+#endif
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
new file mode 100644
index 00000000000..d2d4ce9290d
--- /dev/null
+++ b/innobase/include/lock0lock.h
@@ -0,0 +1,538 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0lock_h
+#define lock0lock_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "rem0types.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "page0types.h"
+#include "lock0types.h"
+#include "read0types.h"
+#include "hash0hash.h"
+
+extern ibool	lock_print_waits;
+
+/*****************************************************************
+Cancels a waiting record lock request and releases the waiting transaction
+that requested it. NOTE: does NOT check if waiting lock requests behind this
+one can now be granted! */
+
+void
+lock_rec_cancel(
+/*============*/
+	lock_t*	lock);	/* in: waiting record lock request */
+/*************************************************************************
+Creates the lock system at database start. */
+
+void
+lock_sys_create(
+/*============*/
+	ulint	n_cells);	/* in: number of slots in lock hash table */
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index. */
+
+trx_t*
+lock_sec_rec_some_has_impl_off_kernel(
+/*==================================*/
+				/* out: transaction which has the x-lock, or
+				NULL */
+	rec_t*		rec,	/* in: user record */
+	dict_index_t*	index);	/* in: secondary index */
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index. */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+				/* out: transaction which has the x-lock, or
+				NULL */
+	rec_t*		rec,	/* in: user record */
+	dict_index_t*	index);	/* in: clustered index */
+/*****************************************************************
+Resets the lock bits for a single record. Releases transactions
+waiting for lock requests here. */
+
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+	rec_t*	rec);	/* in: record whose locks bits should be reset */
+/*****************************************************************
+Makes a record to inherit the locks of another record as gap type
+locks, but does not reset the lock bits of the other record. Also
+waiting lock requests on rec are inherited as GRANTED gap locks. */
+
+void
+lock_rec_inherit_to_gap(
+/*====================*/
+	rec_t*	heir,	/* in: record which inherits */
+	rec_t*	rec);	/* in: record from which inherited; does NOT reset
+			the locks on this record */
+/*****************************************************************
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+
+void
+lock_move_reorganize_page(
+/*======================*/
+	page_t*	page,		/* in: old index page */
+	page_t*	new_page);	/* in: reorganized page */
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+
+void
+lock_move_rec_list_end(
+/*===================*/
+	page_t*	new_page,	/* in: index page to move to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec);		/* in: record on page: this is the
+				first record moved */
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+
+void
+lock_move_rec_list_start(
+/*=====================*/
+	page_t*	new_page,	/* in: index page to move to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec,		/* in: record on page: this is the
+				first record NOT copied */
+	rec_t*	old_end);	/* in: old previous-to-last record on
+				new_page before the records were copied */
+/*****************************************************************
+Updates the lock table when a page is split to the right. */
+
+void
+lock_update_split_right(
+/*====================*/
+	page_t*	right_page,	/* in: right page */
+	page_t*	left_page);	/* in: left page */
+/*****************************************************************
+Updates the lock table when a page is merged to the right. */
+
+void
+lock_update_merge_right(
+/*====================*/
+	rec_t*	orig_succ,	/* in: original successor of infimum
+				on the right page before merge */
+	page_t*	left_page);	/* in: merged index page which will be
+				discarded */
+/*****************************************************************
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+
+void
+lock_update_root_raise(
+/*===================*/
+	page_t*	new_page,	/* in: index page to which copied */
+	page_t*	root);		/* in: root page */
+/*****************************************************************
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+
+void
+lock_update_copy_and_discard(
+/*=========================*/
+	page_t*	new_page,	/* in: index page to which copied */
+	page_t*	page);		/* in: index page; NOT the root! */
+/*****************************************************************
+Updates the lock table when a page is split to the left. */
+
+void
+lock_update_split_left(
+/*===================*/
+	page_t*	right_page,	/* in: right page */
+	page_t*	left_page);	/* in: left page */
+/*****************************************************************
+Updates the lock table when a page is merged to the left. */
+
+void
+lock_update_merge_left(
+/*===================*/
+	page_t*	left_page,	/* in: left page to which merged */
+	rec_t*	orig_pred,	/* in: original predecessor of supremum
+				on the left page before merge */
+	page_t*	right_page);	/* in: merged index page which will be
+				discarded */
+/*****************************************************************
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+	rec_t*	heir,	/* in: heir record */
+	rec_t*	rec);	/* in: record */
+/*****************************************************************
+Updates the lock table when a page is discarded. */
+
+void
+lock_update_discard(
+/*================*/
+	rec_t*	heir,	/* in: record which will inherit the locks */
+	page_t*	page);	/* in: index page which will be discarded */
+/*****************************************************************
+Updates the lock table when a new user record is inserted. */
+
+void
+lock_update_insert(
+/*===============*/
+	rec_t*	rec);	/* in: the inserted record */
+/*****************************************************************
+Updates the lock table when a record is removed. */
+
+void
+lock_update_delete(
+/*===============*/
+	rec_t*	rec);	/* in: the record to be removed */
+/*************************************************************************
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is in such an update moved, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+	rec_t*	rec);	/* in: record whose lock state is stored
+			on the infimum record of the same page; lock
+			bits are reset on the record */
+/*************************************************************************
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+	rec_t*	rec,	/* in: record whose lock state is restored */
+	page_t*	page);	/* in: page (rec is not necessarily on this page)
+			whose infimum stored the lock state; lock bits are
+			reset on the infimum */ 
+/*************************************************************************
+Returns TRUE if there are explicit record locks on a page. */
+
+ibool
+lock_rec_expl_exist_on_page(
+/*========================*/
+			/* out: TRUE if there are explicit record locks on
+			the page */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue. */
+
+ulint
+lock_rec_insert_check_and_lock(
+/*===========================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: record after which to insert */
+	dict_index_t*	index,	/* in: index */
+	que_thr_t*	thr,	/* in: query thread */
+	ibool*		inherit);/* out: set to TRUE if the new inserted
+				record maybe should inherit LOCK_GAP type
+				locks from the successor record */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue. */
+
+ulint
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: record which should be modified */
+	dict_index_t*	index,	/* in: clustered index */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify
+(delete mark or delete unmark) of a secondary index record. */
+
+ulint
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: record which should be modified;
+				NOTE: as this is a secondary index, we
+				always have to modify the clustered index
+				record first: see the comment below */
+	dict_index_t*	index,	/* in: secondary index */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. */
+
+ulint
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: user record or page supremum record
+				which should be read or passed over by a read
+				cursor */
+	dict_index_t*	index,	/* in: clustered index */
+	ulint		mode,	/* in: mode of the lock which the read cursor
+				should set on records: LOCK_S or LOCK_X; the
+				latter is possible in SELECT FOR UPDATE */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Like the counterpart for a clustered index above, but now we read a
+secondary index record. */
+
+ulint
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: user record or page supremum record
+				which should be read or passed over by a read
+				cursor */
+	dict_index_t*	index,	/* in: secondary index */
+	ulint		mode,	/* in: mode of the lock which the read cursor
+				should set on records: LOCK_S or LOCK_X; the
+				latter is possible in SELECT FOR UPDATE */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks that a record is seen in a consistent read. */
+
+ibool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+				/* out: TRUE if sees, or FALSE if an earlier
+				version of the record should be retrieved */
+	rec_t*		rec,	/* in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/* in: clustered index */
+	read_view_t*	view);	/* in: consistent read view */
+/*************************************************************************
+Checks that a non-clustered index record is seen in a consistent read. */
+
+ulint
+lock_sec_rec_cons_read_sees(
+/*========================*/
+				/* out: TRUE if certainly sees, or FALSE if an
+				earlier version of the clustered index record
+				might be needed: NOTE that a non-clustered
+				index page contains so little information on
+				its modifications that also in the case FALSE,
+				the present version of rec may be the right,
+				but we must check this from the clustered
+				index record */
+	rec_t*		rec,	/* in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/* in: non-clustered index */
+	read_view_t*	view);	/* in: consistent read view */
+/*************************************************************************
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait. */
+
+ulint
+lock_table(
+/*=======*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	dict_table_t*	table,	/* in: database table in dictionary cache */
+	ulint		mode,	/* in: lock mode */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if there are any locks set on the table. */
+
+ibool
+lock_is_on_table(
+/*=============*/
+				/* out: TRUE if there are lock(s) */
+	dict_table_t*	table);	/* in: database table in dictionary cache */
+/*************************************************************************
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+
+void
+lock_release_off_kernel(
+/*====================*/
+	trx_t*	trx);	/* in: transaction */
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*===========*/
+			/* out: folded value */
+	ulint	space,	/* in: space */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+			/* out: hashed value */
+	ulint	space,	/* in: space */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Gets the mutex protecting record locks on a given page address. */
+
+mutex_t*
+lock_rec_get_mutex_for_addr(
+/*========================*/
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Validates the lock queue on a single record. */
+
+ibool
+lock_rec_queue_validate(
+/*====================*/
+				/* out: TRUE if ok */
+	rec_t*		rec,	/* in: record to look at */
+	dict_index_t*	index);	/* in: index, or NULL if not known */
+/*************************************************************************
+Prints info of a table lock. */
+
+void
+lock_table_print(
+/*=============*/
+	lock_t*	lock);	/* in: table type lock */
+/*************************************************************************
+Prints info of a record lock. */
+
+void
+lock_rec_print(
+/*===========*/
+	lock_t*	lock);	/* in: record type lock */
+/*************************************************************************
+Prints info of locks for all transactions. */
+
+void
+lock_print_info(void);
+/*=================*/
+/*************************************************************************
+Validates the lock queue on a table. */
+
+ibool
+lock_table_queue_validate(
+/*======================*/
+				/* out: TRUE if ok */
+	dict_table_t*	table);	/* in: table */
+/*************************************************************************
+Validates the record lock queues on a page. */
+
+ibool
+lock_rec_validate_page(
+/*===================*/
+			/* out: TRUE if ok */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Validates the lock system. */
+
+ibool
+lock_validate(void);
+/*===============*/
+			/* out: TRUE if ok */
+
+/* The lock system */
+extern lock_sys_t*	lock_sys;
+
+/* Lock modes and types */
+#define	LOCK_NONE	0	/* this flag is used elsewhere to note
+				consistent read */
+#define	LOCK_IS		2	/* intention shared */
+#define	LOCK_IX		3	/* intention exclusive */
+#define	LOCK_S		4	/* shared */
+#define	LOCK_X		5	/* exclusive */
+#define LOCK_MODE_MASK	0xF	/* mask used to extract mode from the
+				type_mode field in a lock */
+#define LOCK_TABLE	16	/* these type values should be so high that */
+#define	LOCK_REC	32	/* they can be ORed to the lock mode */
+#define LOCK_TYPE_MASK	0xF0	/* mask used to extract lock type from the
+				type_mode field in a lock */
+#define LOCK_WAIT	256	/* this wait bit should be so high that
+				it can be ORed to the lock mode and type;
+				when this bit is set, it means that the
+				lock has not yet been granted, it is just
+				waiting for its turn in the wait queue */
+#define LOCK_GAP	512	/* this gap bit should be so high that
+				it can be ORed to the other flags;
+				when this bit is set, it means that the
+				lock holds only on the gap before the record;
+				for instance, an x-lock on the gap does not
+				give permission to modify the record on which
+				the bit is set; locks of this type are created
+				when records are removed from the index chain
+				of records */
+
+/* When lock bits are reset, the following flags are available: */
+#define LOCK_RELEASE_WAIT	1
+#define LOCK_NOT_RELEASE_WAIT	2
+
+/* Lock operation struct */
+typedef struct lock_op_struct	lock_op_t;
+struct lock_op_struct{
+	dict_table_t*	table;	/* table to be locked */
+	ulint		mode;	/* lock mode */
+};
+
+#define LOCK_OP_START		1
+#define LOCK_OP_COMPLETE	2
+
+/* The lock system struct */
+struct lock_sys_struct{
+	hash_table_t*	rec_hash;	/* hash table of the record locks */
+};
+
+/* The lock system */
+extern lock_sys_t*	lock_sys;
+
+
+#ifndef UNIV_NONINL
+#include "lock0lock.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic
new file mode 100644
index 00000000000..64c43c88d2e
--- /dev/null
+++ b/innobase/include/lock0lock.ic
@@ -0,0 +1,80 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "srv0srv.h"
+#include "dict0dict.h"
+#include "row0row.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+#include "buf0buf.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "row0vers.h"
+#include "que0que.h"
+#include "btr0cur.h"
+#include "read0read.h"
+#include "log0recv.h"
+
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+			/* out: folded value */
+	ulint	space,	/* in: space */
+	ulint	page_no)/* in: page number */
+{
+	return(ut_fold_ulint_pair(space, page_no));
+}
+
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+			/* out: hashed value */
+	ulint	space,	/* in: space */
+	ulint	page_no)/* in: page number */
+{
+	return(hash_calc_hash(lock_rec_fold(space, page_no),
+							lock_sys->rec_hash));
+}
+
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index. */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+				/* out: transaction which has the x-lock, or
+				NULL */
+	rec_t*		rec,	/* in: user record */
+	dict_index_t*	index)	/* in: clustered index */
+{
+	dulint	trx_id;
+
+	ut_ad(mutex_own(&kernel_mutex));
+	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(page_rec_is_user_rec(rec));
+
+	trx_id = row_get_rec_trx_id(rec, index);
+
+	if (trx_is_active(trx_id)) {
+		/* The modifying or inserting transaction is active */
+
+		return(trx_get_on_id(trx_id));
+	}
+
+	return(NULL);
+}
diff --git a/innobase/include/lock0types.h b/innobase/include/lock0types.h
new file mode 100644
index 00000000000..705e64f6581
--- /dev/null
+++ b/innobase/include/lock0types.h
@@ -0,0 +1,15 @@
+/******************************************************
+The transaction lock system global types
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0types_h
+#define lock0types_h
+
+typedef struct lock_struct	lock_t;
+typedef struct lock_sys_struct	lock_sys_t;
+
+#endif 
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
new file mode 100644
index 00000000000..001f98cfc3c
--- /dev/null
+++ b/innobase/include/log0log.h
@@ -0,0 +1,752 @@
+/******************************************************
+Database log
+
+(c) 1995 Innobase Oy
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0log_h
+#define log0log_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+
+typedef struct log_struct	log_t;
+typedef struct log_group_struct	log_group_t;
+
+extern	ibool	log_do_write;
+extern 	ibool	log_debug_writes;
+
+/* Wait modes for log_flush_up_to */
+#define LOG_NO_WAIT		91
+#define LOG_WAIT_ONE_GROUP	92
+#define	LOG_WAIT_ALL_GROUPS	93
+#define LOG_MAX_N_GROUPS	32
+
+/****************************************************************
+Writes to the log the string given. The log must be released with
+log_release. */
+UNIV_INLINE
+dulint
+log_reserve_and_write_fast(
+/*=======================*/
+			/* out: end lsn of the log record, ut_dulint_zero if
+			did not succeed */
+	byte*	str,	/* in: string */
+	ulint	len,	/* in: string length */
+	dulint*	start_lsn,/* out: start lsn of the log record */
+	ibool*	success);/* out: TRUE if success */
+/***************************************************************************
+Releases the log mutex. */
+UNIV_INLINE
+void
+log_release(void);
+/*=============*/
+/***************************************************************************
+Checks if there is need for a log buffer flush or a new checkpoint, and does
+this if yes. Any database operation should call this when it has modified
+more than about 4 pages. NOTE that this function may only be called when the
+OS thread owns no synchronization objects except the dictionary mutex. */
+UNIV_INLINE
+void
+log_free_check(void);
+/*================*/
+/****************************************************************
+Opens the log for log_write_low. The log must be closed with log_close and
+released with log_release. */
+
+dulint
+log_reserve_and_open(
+/*=================*/
+			/* out: start lsn of the log record */
+	ulint	len);	/* in: length of data to be catenated */
+/****************************************************************
+Writes to the log the string given. It is assumed that the caller holds the
+log mutex. */
+
+void
+log_write_low(
+/*==========*/
+	byte*	str,		/* in: string */
+	ulint	str_len);	/* in: string length */
+/****************************************************************
+Closes the log. */
+
+dulint
+log_close(void);
+/*===========*/
+			/* out: lsn */
+/****************************************************************
+Gets the current lsn. */
+UNIV_INLINE
+dulint
+log_get_lsn(void);
+/*=============*/
+			/* out: current lsn */
+/****************************************************************************
+Gets the online backup lsn. */
+UNIV_INLINE
+dulint
+log_get_online_backup_lsn_low(void);
+/*===============================*/
+/****************************************************************************
+Gets the online backup state. */
+UNIV_INLINE
+ibool
+log_get_online_backup_state_low(void);
+/*=================================*/
+				/* out: online backup state, the caller must
+				own the log_sys mutex */
+/**********************************************************
+Initializes the log. */
+
+void
+log_init(void);
+/*==========*/
+/**********************************************************************
+Inits a log group to the log system. */
+
+void
+log_group_init(
+/*===========*/
+	ulint	id,			/* in: group id */
+	ulint	n_files,		/* in: number of log files */
+	ulint	file_size,		/* in: log file size in bytes */
+	ulint	space_id,		/* in: space id of the file space
+					which contains the log files of this
+					group */
+	ulint	archive_space_id);	/* in: space id of the file space
+					which contains some archived log
+					files for this group; currently, only
+					for the first log group this is
+					used */
+/**********************************************************
+Completes an i/o to a log file. */
+
+void
+log_io_complete(
+/*============*/
+	log_group_t*	group);	/* in: log group */
+/**********************************************************
+This function is called, e.g., when a transaction wants to commit. It checks
+that the log has been flushed to disk up to the last log entry written by the
+transaction. If there is a flush running, it waits and checks if the flush
+flushed enough. If not, starts a new flush. */
+
+void
+log_flush_up_to(
+/*============*/
+	dulint	lsn,	/* in: log sequence number up to which the log should
+			be flushed, ut_dulint_max if not specified */
+	ulint	wait);	/* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+			or LOG_WAIT_ALL_GROUPS */
+/********************************************************************
+Advances the smallest lsn for which there are unflushed dirty blocks in the
+buffer pool and also may make a new checkpoint. NOTE: this function may only
+be called if the calling thread owns no synchronization objects! */
+
+ibool
+log_preflush_pool_modified_pages(
+/*=============================*/
+				/* out: FALSE if there was a flush batch of
+				the same type running, which means that we
+				could not start this flush batch */
+	dulint	new_oldest,	/* in: try to advance oldest_modified_lsn
+				at least to this lsn */
+	ibool	sync);		/* in: TRUE if synchronous operation is
+				desired */
+/**********************************************************
+Makes a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log files. Use log_make_checkpoint_at to flush also the pool. */
+
+ibool
+log_checkpoint(
+/*===========*/
+				/* out: TRUE if success, FALSE if a checkpoint
+				write was already running */
+	ibool	sync,		/* in: TRUE if synchronous operation is
+				desired */
+	ibool	write_always);	/* in: the function normally checks if the
+				the new checkpoint would have a greater
+				lsn than the previous one: if not, then no
+				physical write is done; by setting this
+				parameter TRUE, a physical write will always be
+				made to log files */
+/********************************************************************
+Makes a checkpoint at a given lsn or later. */
+
+void
+log_make_checkpoint_at(
+/*===================*/
+	dulint	lsn,		/* in: make a checkpoint at this or a later
+				lsn, if ut_dulint_max, makes a checkpoint at
+				the latest lsn */
+	ibool	write_always);	/* in: the function normally checks if the
+				the new checkpoint would have a greater
+				lsn than the previous one: if not, then no
+				physical write is done; by setting this
+				parameter TRUE, a physical write will always be
+				made to log files */
+/********************************************************************
+Makes a checkpoint at the latest lsn and writes it to first page of each
+data file in the database, so that we know that the file spaces contain
+all modifications up to that lsn. This can only be called at database
+shutdown. This function also writes all log in log files to the log archive. */
+
+void
+logs_empty_and_mark_files_at_shutdown(void);
+/*=======================================*/
+/**********************************************************
+Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
+
+void
+log_group_read_checkpoint_info(
+/*===========================*/
+	log_group_t*	group,	/* in: log group */
+	ulint		field);	/* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+/***********************************************************************
+Gets info from a checkpoint about a log group. */
+
+void
+log_checkpoint_get_nth_group_info(
+/*==============================*/
+	byte*	buf,	/* in: buffer containing checkpoint info */
+	ulint	n,	/* in: nth slot */
+	ulint*	file_no,/* out: archived file number */
+	ulint*	offset);/* out: archived file offset */
+/**********************************************************
+Writes checkpoint info to groups. */
+
+void
+log_groups_write_checkpoint_info(void);
+/*==================================*/
+/************************************************************************
+Starts an archiving operation. */
+
+ibool
+log_archive_do(
+/*===========*/
+			/* out: TRUE if succeed, FALSE if an archiving
+			operation was already running */
+	ibool	sync,	/* in: TRUE if synchronous operation is desired */
+	ulint*	n_bytes);/* out: archive log buffer size, 0 if nothing to
+			archive */
+/********************************************************************
+Writes the log contents to the archive up to the lsn when this function was
+called, and stops the archiving. When archiving is started again, the archived
+log file numbers start from a number one higher, so that the archiving will
+not write again to the archived log files which exist when this function
+returns. */
+
+ulint
+log_archive_stop(void);
+/*==================*/
+				/* out: DB_SUCCESS or DB_ERROR */
+/********************************************************************
+Starts again archiving which has been stopped. */
+
+ulint
+log_archive_start(void);
+/*===================*/
+			/* out: DB_SUCCESS or DB_ERROR */
+/********************************************************************
+Stop archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_noarchivelog(void);
+/*==========================*/
+			/* out: DB_SUCCESS or DB_ERROR */
+/********************************************************************
+Start archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_archivelog(void);
+/*========================*/
+			/* out: DB_SUCCESS or DB_ERROR */
+/**********************************************************
+Generates an archived log file name. */
+
+void
+log_archived_file_name_gen(
+/*=======================*/
+	char*	buf,	/* in: buffer where to write */
+	ulint	id,	/* in: group id */
+	ulint	file_no);/* in: file number */
+/**********************************************************
+Switches the database to the online backup state. */
+
+ulint
+log_switch_backup_state_on(void);
+/*============================*/
+			/* out: DB_SUCCESS or DB_ERROR */
+/**********************************************************
+Switches the online backup state off. */
+
+ulint
+log_switch_backup_state_off(void);
+/*=============================*/
+			/* out: DB_SUCCESS or DB_ERROR */
+/************************************************************************
+Checks that there is enough free space in the log to start a new query step.
+Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
+function may only be called if the calling thread owns no synchronization
+objects! */
+
+void
+log_check_margins(void);
+/*===================*/
+/**********************************************************
+Reads a specified log segment to a buffer. */
+
+void
+log_group_read_log_seg(
+/*===================*/
+	ulint		type,		/* in: LOG_ARCHIVE or LOG_RECOVER */
+	byte*		buf,		/* in: buffer where to read */
+	log_group_t*	group,		/* in: log group */
+	dulint		start_lsn,	/* in: read area start */
+	dulint		end_lsn);	/* in: read area end */
+/**********************************************************
+Writes a buffer to a log file group. */
+
+void
+log_group_write_buf(
+/*================*/
+	ulint		type,		/* in: LOG_FLUSH or LOG_RECOVER */
+	log_group_t*	group,		/* in: log group */
+	byte*		buf,		/* in: buffer */
+	ulint		len,		/* in: buffer len; must be divisible
+					by OS_FILE_LOG_BLOCK_SIZE */
+	dulint		start_lsn,	/* in: start lsn of the buffer; must
+					be divisible by
+					OS_FILE_LOG_BLOCK_SIZE */
+	ulint		new_data_offset);/* in: start offset of new data in
+					buf: this parameter is used to decide
+					if we have to write a new log file
+					header */
+/************************************************************
+Sets the field values in group to correspond to a given lsn. For this function
+to work, the values must already be correctly initialized to correspond to
+some lsn, for instance, a checkpoint lsn. */
+
+void
+log_group_set_fields(
+/*=================*/
+	log_group_t*	group,	/* in: group */
+	dulint		lsn);	/* in: lsn for which the values should be
+				set */
+/**********************************************************
+Calculates the data capacity of a log group, when the log file headers are not
+included. */
+
+ulint
+log_group_get_capacity(
+/*===================*/
+				/* out: capacity in bytes */
+	log_group_t*	group);	/* in: log group */
+/****************************************************************
+Gets a log block flush bit. */
+UNIV_INLINE
+ibool
+log_block_get_flush_bit(
+/*====================*/
+				/* out: TRUE if this block was the first
+				to be written in a log flush */
+	byte*	log_block);	/* in: log block */
+/****************************************************************
+Gets a log block number stored in the header. */
+UNIV_INLINE
+ulint
+log_block_get_hdr_no(
+/*=================*/
+				/* out: log block number stored in the block
+				header */
+	byte*	log_block);	/* in: log block */
+/****************************************************************
+Gets a log block data length. */
+UNIV_INLINE
+ulint
+log_block_get_data_len(
+/*===================*/
+				/* out: log block data length measured as a
+				byte offset from the block start */
+	byte*	log_block);	/* in: log block */
+/****************************************************************
+Sets the log block data length. */
+UNIV_INLINE
+void
+log_block_set_data_len(
+/*===================*/
+	byte*	log_block,	/* in: log block */
+	ulint	len);		/* in: data length */
+/****************************************************************
+Gets a log block number stored in the trailer. */
+UNIV_INLINE
+ulint
+log_block_get_trl_no(
+/*=================*/
+				/* out: log block number stored in the block
+				trailer */
+	byte*	log_block);	/* in: log block */
+/****************************************************************
+Gets a log block first mtr log record group offset. */
+UNIV_INLINE
+ulint
+log_block_get_first_rec_group(
+/*==========================*/
+				/* out: first mtr log record group byte offset
+				from the block start, 0 if none */
+	byte*	log_block);	/* in: log block */
+/****************************************************************
+Sets the log block first mtr log record group offset. */
+UNIV_INLINE
+void
+log_block_set_first_rec_group(
+/*==========================*/
+	byte*	log_block,	/* in: log block */
+	ulint	offset);	/* in: offset, 0 if none */
+/****************************************************************
+Gets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+ulint
+log_block_get_checkpoint_no(
+/*========================*/
+				/* out: checkpoint no (4 lowest bytes) */
+	byte*	log_block);	/* in: log block */
+/****************************************************************
+Initializes a log block in the log buffer. */
+UNIV_INLINE
+void
+log_block_init(
+/*===========*/
+	byte*	log_block,	/* in: pointer to the log buffer */
+	dulint	lsn);		/* in: lsn within the log block */
+/****************************************************************
+Converts a lsn to a log block number. */
+UNIV_INLINE
+ulint
+log_block_convert_lsn_to_no(
+/*========================*/
+			/* out: log block number, it is > 0 and <= 1G */
+	dulint	lsn);	/* in: lsn of a byte within the block */
+/**********************************************************
+Prints info of the log. */
+
+void
+log_print(void);
+/*===========*/
+
+extern log_t*	log_sys;
+
+/* Values used as flags */
+#define LOG_FLUSH	7652559
+#define LOG_CHECKPOINT	78656949
+#define LOG_ARCHIVE	11122331
+#define LOG_RECOVER	98887331
+
+/* The counting of lsn's starts from this value: this must be non-zero */
+#define LOG_START_LSN	ut_dulint_create(0, 16 * OS_FILE_LOG_BLOCK_SIZE)
+
+#define LOG_BUFFER_SIZE 	(srv_log_buffer_size * UNIV_PAGE_SIZE)
+#define LOG_ARCHIVE_BUF_SIZE	(srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
+
+/* Offsets of a log block header */
+#define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
+					is allowed to wrap around at 2G; the
+					highest bit is set to 1 if this is the
+					first log block in a log flush write
+					segment */
+#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000
+					/* mask used to get the highest bit in
+					the preceding field */
+#define	LOG_BLOCK_HDR_DATA_LEN	4	/* number of bytes of log written to
+					this block */
+#define	LOG_BLOCK_FIRST_REC_GROUP 6	/* offset of the first start of an
+					mtr log record group in this log block,
+					0 if none; if the value is the same
+					as LOG_BLOCK_HDR_DATA_LEN, it means
+					that the first rec group has not yet
+					been catenated to this log block, but
+					if it will, it will start at this
+					offset; an archive recovery can
+					start parsing the log records starting
+					from this offset in this log block,
+					if value not 0 */
+#define LOG_BLOCK_CHECKPOINT_NO	8	/* 4 lower bytes of the value of
+					log_sys->next_checkpoint_no when the
+					log block was last written to: if the
+					block has not yet been written full,
+					this value is only updated before a
+					log buffer flush */
+#define LOG_BLOCK_HDR_SIZE	12	/* size of the log block header in
+					bytes */
+
+/* Offsets of a log block trailer from the end of the block */
+#define	LOG_BLOCK_TRL_NO	4	/* log block number */
+#define	LOG_BLOCK_TRL_SIZE	4	/* trailer size in bytes */
+
+/* Offsets for a checkpoint field */
+#define LOG_CHECKPOINT_NO		0
+#define LOG_CHECKPOINT_LSN		8
+#define LOG_CHECKPOINT_OFFSET		16
+#define LOG_CHECKPOINT_LOG_BUF_SIZE	20
+#define	LOG_CHECKPOINT_ARCHIVED_LSN	24
+#define	LOG_CHECKPOINT_GROUP_ARRAY	32
+
+/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */
+
+#define LOG_CHECKPOINT_ARCHIVED_FILE_NO	0
+#define LOG_CHECKPOINT_ARCHIVED_OFFSET	4
+
+#define	LOG_CHECKPOINT_ARRAY_END	(LOG_CHECKPOINT_GROUP_ARRAY\
+							+ LOG_MAX_N_GROUPS * 8)
+#define LOG_CHECKPOINT_CHECKSUM_1 	LOG_CHECKPOINT_ARRAY_END
+#define LOG_CHECKPOINT_CHECKSUM_2 	(4 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_SIZE		(8 + LOG_CHECKPOINT_ARRAY_END)
+
+/* Offsets of a log file header */
+#define LOG_GROUP_ID		0	/* log group number */
+#define LOG_FILE_START_LSN	4	/* lsn of the start of data in this
+					log file */
+#define LOG_FILE_NO		12	/* 4-byte archived log file number */
+#define	LOG_FILE_ARCH_COMPLETED	OS_FILE_LOG_BLOCK_SIZE
+					/* this 4-byte field is TRUE when
+					the writing of an archived log file
+					has been completed */
+#define LOG_FILE_END_LSN	(OS_FILE_LOG_BLOCK_SIZE + 4)
+					/* lsn where the archived log file
+					at least extends: actually the
+					archived log file may extend to a
+					later lsn, as long as it is within the
+					same log block as this lsn; this field
+					is defined only when an archived log
+					file has been completely written */
+#define LOG_CHECKPOINT_1	OS_FILE_LOG_BLOCK_SIZE
+#define LOG_CHECKPOINT_2	(3 * OS_FILE_LOG_BLOCK_SIZE)
+#define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
+
+#define LOG_GROUP_OK		301
+#define LOG_GROUP_CORRUPTED	302
+
+/* Log group consists of a number of log files, each of the same size; a log
+group is implemented as a space in the sense of the module fil0fil. */
+
+struct log_group_struct{
+	/* The following fields are protected by log_sys->mutex */
+	ulint		id;		/* log group id */
+	ulint		n_files;	/* number of files in the group */
+	ulint		file_size;	/* individual log file size in bytes,
+					including the log file header */
+	ulint		space_id;	/* file space which implements the log
+					group */
+	ulint		state;		/* LOG_GROUP_OK or
+					LOG_GROUP_CORRUPTED */
+	dulint		lsn;		/* lsn used to fix coordinates within
+					the log group */
+	ulint		lsn_offset;	/* the offset of the above lsn */
+	ulint		n_pending_writes;/* number of currently pending flush
+					writes for this log group */
+	byte**		file_header_bufs;/* buffers for each file header in the
+					group */
+	/*-----------------------------*/
+	byte**		archive_file_header_bufs;/* buffers for each file
+					header in the group */
+	ulint		archive_space_id;/* file space which implements the log
+					group archive */
+	ulint		archived_file_no;/* file number corresponding to
+					log_sys->archived_lsn */
+	ulint		archived_offset;/* file offset corresponding to
+					log_sys->archived_lsn, 0 if we have
+					not yet written to the archive file
+					number archived_file_no */
+	ulint		next_archived_file_no;/* during an archive write,
+					until the write is completed, we
+					store the next value for
+					archived_file_no here: the write
+					completion function then sets the new
+					value to ..._file_no */
+	ulint		next_archived_offset; /* like the preceding field */
+	/*-----------------------------*/
+	dulint		scanned_lsn;	/* used only in recovery: recovery scan
+					succeeded up to this lsn in this log
+					group */
+	byte*		checkpoint_buf;	/* checkpoint header is written from
+					this buffer to the group */
+	UT_LIST_NODE_T(log_group_t)
+			log_groups;	/* list of log groups */
+};	
+
+struct log_struct{
+	byte		pad[64];	/* padding to prevent other memory
+					update hotspots from residing on the
+					same memory cache line */
+	dulint		lsn;		/* log sequence number */
+	ulint		buf_free;	/* first free offset within the log
+					buffer */
+	mutex_t		mutex;		/* mutex protecting the log */
+	byte*		buf;		/* log buffer */
+	ulint		buf_size;	/* log buffer size in bytes */
+	ulint		max_buf_free;	/* recommended maximum value of
+					buf_free, after which the buffer is
+					flushed */
+	ulint		old_buf_free;	/* value of buf free when log was
+					last time opened; only in the debug
+					version */
+	dulint		old_lsn;	/* value of lsn when log was last time
+					opened; only in the debug version */
+	ibool		check_flush_or_checkpoint;
+					/* this is set to TRUE when there may
+					be need to flush the log buffer, or
+					preflush buffer pool pages, or make
+					a checkpoint; this MUST be TRUE when
+					lsn - last_checkpoint_lsn >
+					max_checkpoint_age; this flag is
+					peeked at by log_free_check(), which
+					does not reserve the log mutex */
+	UT_LIST_BASE_NODE_T(log_group_t)
+			log_groups;	/* log groups */
+
+	/* The fields involved in the log buffer flush */
+
+	ulint		buf_next_to_write;/* first offset in the log buffer
+					where the byte content may not exist
+					written to file, e.g., the start
+					offset of a log record catenated
+					later; this is advanced when a flush
+					operation is completed to all the log
+					groups */
+	dulint		written_to_some_lsn;
+					/* first log sequence number not yet
+					written to any log group; for this to
+					be advanced, it is enough that the
+					write i/o has been completed for any
+					one log group */
+	dulint		written_to_all_lsn;
+					/* first log sequence number not yet
+					written to some log group; for this to
+					be advanced, it is enough that the
+					write i/o has been completed for all
+					log groups */
+	dulint		flush_lsn;	/* end lsn for the current flush */
+	ulint		flush_end_offset;/* the data in buffer ha been flushed
+					up to this offset when the current
+					flush ends: this field will then
+					be copied to buf_next_to_write */
+	ulint		n_pending_writes;/* number of currently pending flush
+					writes */
+	os_event_t	no_flush_event;	/* this event is in the reset state
+					when a flush is running; a thread
+					should wait for this without owning
+					the log mutex, but NOTE that to set or
+					reset this event, the thread MUST own
+					the log mutex! */
+	ibool		one_flushed;	/* during a flush, this is first FALSE
+					and becomes TRUE when one log group
+					has been flushed */
+	os_event_t	one_flushed_event;/* this event is reset when the
+					flush has not yet completed for any
+					log group; e.g., this means that a
+					transaction has been committed when
+					this is set; a thread should wait
+					for this without owning the log mutex,
+					but NOTE that to set or reset this
+					event, the thread MUST own the log
+					mutex! */
+	ulint		n_log_ios;	/* number of log i/os initiated thus
+					far */
+	/* Fields involved in checkpoints */
+	ulint		max_modified_age_async;
+					/* when this recommended value for lsn
+					- buf_pool_get_oldest_modification()
+					is exceeded, we start an asynchronous
+					preflush of pool pages */
+	ulint		max_modified_age_sync;
+					/* when this recommended value for lsn
+					- buf_pool_get_oldest_modification()
+					is exceeded, we start a synchronous
+					preflush of pool pages */
+	ulint		adm_checkpoint_interval;
+					/* administrator-specified checkpoint
+					interval in terms of log growth in
+					bytes; the interval actually used by
+					the database can be smaller */
+	ulint		max_checkpoint_age_async;
+					/* when this checkpoint age is exceeded
+					we start an asynchronous writing of a
+					new checkpoint */
+	ulint		max_checkpoint_age;
+					/* this is the maximum allowed value
+					for lsn - last_checkpoint_lsn when a
+					new query step is started */
+	dulint		next_checkpoint_no;
+					/* next checkpoint number */
+	dulint		last_checkpoint_lsn;
+					/* latest checkpoint lsn */
+	dulint		next_checkpoint_lsn;
+					/* next checkpoint lsn */
+	ulint		n_pending_checkpoint_writes;
+					/* number of currently pending
+					checkpoint writes */
+	rw_lock_t	checkpoint_lock;/* this latch is x-locked when a
+					checkpoint write is running; a thread
+					should wait for this without owning
+					the log mutex */
+	byte*		checkpoint_buf;	/* checkpoint header is read to this
+					buffer */
+	/* Fields involved in archiving */
+	ulint		archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING
+					LOG_ARCH_STOPPED, LOG_ARCH_OFF */
+	dulint		archived_lsn;	/* archiving has advanced to this lsn */
+	ulint		max_archived_lsn_age_async;
+					/* recommended maximum age of
+					archived_lsn, before we start
+					asynchronous copying to the archive */
+	ulint		max_archived_lsn_age;
+					/* maximum allowed age for
+					archived_lsn */
+	dulint		next_archived_lsn;/* during an archive write,
+					until the write is completed, we
+					store the next value for
+					archived_lsn here: the write
+					completion function then sets the new
+					value to archived_lsn */
+	ulint		archiving_phase;/* LOG_ARCHIVE_READ or
+					LOG_ARCHIVE_WRITE */
+	ulint		n_pending_archive_ios;
+					/* number of currently pending reads
+					or writes in archiving */
+	rw_lock_t	archive_lock;	/* this latch is x-locked when an
+					archive write is running; a thread
+					should wait for this without owning
+					the log mutex */
+	ulint		archive_buf_size;/* size of archive_buf */
+	byte*		archive_buf;	/* log segment is written to the
+					archive from this buffer */
+	os_event_t	archiving_on;	/* if archiving has been stopped,
+					a thread can wait for this event to
+					become signaled */
+	/* Fields involved in online backups */
+	ibool		online_backup_state;
+					/* TRUE if the database is in the
+					online backup state */
+	dulint		online_backup_lsn;
+					/* lsn when the state was changed to
+					the online backup state */
+};
+
+#define LOG_ARCH_ON		71
+#define LOG_ARCH_STOPPING	72
+#define LOG_ARCH_STOPPING2	73
+#define LOG_ARCH_STOPPED	74
+#define LOG_ARCH_OFF		75
+
+#ifndef UNIV_NONINL
+#include "log0log.ic"
+#endif
+
+#endif
diff --git a/innobase/include/log0log.ic b/innobase/include/log0log.ic
new file mode 100644
index 00000000000..e5c313d129b
--- /dev/null
+++ b/innobase/include/log0log.ic
@@ -0,0 +1,378 @@
+/******************************************************
+Database log
+
+(c) 1995 Innobase Oy
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "mach0data.h"
+#include "mtr0mtr.h"
+
+/**********************************************************
+Checks by parsing that the catenated log segment for a single mtr is
+consistent. */
+
+ibool
+log_check_log_recs(
+/*===============*/
+	byte*	buf,		/* in: pointer to the start of the log segment
+				in the log_sys->buf log buffer */
+	ulint	len,		/* in: segment length in bytes */
+	dulint	buf_start_lsn);	/* in: buffer start lsn */
+
+/****************************************************************
+Gets a log block flush bit. */
+UNIV_INLINE
+ibool
+log_block_get_flush_bit(
+/*====================*/
+				/* out: TRUE if this block was the first
+				to be written in a log flush */
+	byte*	log_block)	/* in: log block */
+{
+	if (LOG_BLOCK_FLUSH_BIT_MASK
+		& mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/****************************************************************
+Sets the log block flush bit. */
+UNIV_INLINE
+void
+log_block_set_flush_bit(
+/*====================*/
+	byte*	log_block,	/* in: log block */
+	ibool	val)		/* in: value to set */
+{
+	ulint	field;
+
+	field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO);
+
+	if (val) {
+		field = field | LOG_BLOCK_FLUSH_BIT_MASK;
+	} else {
+		field = field & ~LOG_BLOCK_FLUSH_BIT_MASK;
+	}
+
+	mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
+}	
+
+/****************************************************************
+Gets a log block number stored in the header. */
+UNIV_INLINE
+ulint
+log_block_get_hdr_no(
+/*=================*/
+				/* out: log block number stored in the block
+				header */
+	byte*	log_block)	/* in: log block */
+{
+	return(~LOG_BLOCK_FLUSH_BIT_MASK 
+		& mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
+}
+
+/****************************************************************
+Sets the log block number stored in the header; NOTE that this must be set
+before the flush bit! */
+UNIV_INLINE
+void
+log_block_set_hdr_no(
+/*=================*/
+	byte*	log_block,	/* in: log block */
+	ulint	n)		/* in: log block number: must be > 0 and
+				< LOG_BLOCK_FLUSH_BIT_MASK */
+{
+	ut_ad(n > 0);
+	ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
+	
+	mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
+}
+
+/****************************************************************
+Gets a log block data length. */
+UNIV_INLINE
+ulint
+log_block_get_data_len(
+/*===================*/
+				/* out: log block data length measured as a
+				byte offset from the block start */
+	byte*	log_block)	/* in: log block */
+{
+	return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
+}
+
+/****************************************************************
+Sets the log block data length. */
+UNIV_INLINE
+void
+log_block_set_data_len(
+/*===================*/
+	byte*	log_block,	/* in: log block */
+	ulint	len)		/* in: data length */
+{
+	mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
+}
+
+/****************************************************************
+Gets a log block first mtr log record group offset. */
+UNIV_INLINE
+ulint
+log_block_get_first_rec_group(
+/*==========================*/
+				/* out: first mtr log record group byte offset
+				from the block start, 0 if none */
+	byte*	log_block)	/* in: log block */
+{
+	return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
+}
+
+/****************************************************************
+Sets the log block first mtr log record group offset. */
+UNIV_INLINE
+void
+log_block_set_first_rec_group(
+/*==========================*/
+	byte*	log_block,	/* in: log block */
+	ulint	offset)		/* in: offset, 0 if none */
+{
+	mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
+}
+
+/****************************************************************
+Gets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+ulint
+log_block_get_checkpoint_no(
+/*========================*/
+				/* out: checkpoint no (4 lowest bytes) */
+	byte*	log_block)	/* in: log block */
+{
+	return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
+}
+
+/****************************************************************
+Sets a log block checkpoint number field (4 lowest bytes). */
+UNIV_INLINE
+void
+log_block_set_checkpoint_no(
+/*========================*/
+	byte*	log_block,	/* in: log block */
+	dulint	no)		/* in: checkpoint no */
+{
+	mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO,
+						ut_dulint_get_low(no));
+}
+
+/****************************************************************
+Gets a log block number stored in the trailer. */
+UNIV_INLINE
+ulint
+log_block_get_trl_no(
+/*=================*/
+				/* out: log block number stored in the block
+				trailer */
+	byte*	log_block)	/* in: log block */
+{
+	return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
+							- LOG_BLOCK_TRL_NO));
+}
+
+/****************************************************************
+Sets the log block number stored in the trailer. */
+UNIV_INLINE
+void
+log_block_set_trl_no(
+/*=================*/
+	byte*	log_block,	/* in: log block */
+	ulint	n)		/* in: log block number */
+{
+	mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_NO,
+ 									n);
+}
+
+/****************************************************************
+Converts a lsn to a log block number. */
+UNIV_INLINE
+ulint
+log_block_convert_lsn_to_no(
+/*========================*/
+			/* out: log block number, it is > 0 and <= 1G */
+	dulint	lsn)	/* in: lsn of a byte within the block */
+{
+	ulint	no;
+
+	no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE;
+	no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+		* 2 * (0x80000000 / OS_FILE_LOG_BLOCK_SIZE);
+	
+	no = no & 0x3FFFFFFF;
+
+	return(no + 1);
+}
+
+/****************************************************************
+Initializes a log block in the log buffer. */
+UNIV_INLINE
+void
+log_block_init(
+/*===========*/
+	byte*	log_block,	/* in: pointer to the log buffer */
+	dulint	lsn)		/* in: lsn within the log block */
+{
+	ulint	no;
+
+	ut_ad(mutex_own(&(log_sys->mutex)));
+
+	no = log_block_convert_lsn_to_no(lsn);
+	
+	log_block_set_hdr_no(log_block, no);
+	log_block_set_trl_no(log_block, no);
+
+	log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
+	log_block_set_first_rec_group(log_block, 0);
+}
+	
+/****************************************************************
+Writes to the log the string given. The log must be released with
+log_release. */
+UNIV_INLINE
+dulint
+log_reserve_and_write_fast(
+/*=======================*/
+			/* out: end lsn of the log record, ut_dulint_zero if
+			did not succeed */
+	byte*	str,	/* in: string */
+	ulint	len,	/* in: string length */
+	dulint*	start_lsn,/* out: start lsn of the log record */
+	ibool*	success)/* out: TRUE if success */
+{
+	log_t*	log	= log_sys;
+	ulint	data_len;
+	dulint	lsn;
+
+	*success = TRUE;
+
+	mutex_enter(&(log->mutex));
+
+	data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+
+	if (log->online_backup_state
+	    || (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE)) {
+
+	    	/* The string does not fit within the current log block
+	    	or the log block would become full */
+
+	    	*success = FALSE;
+
+		mutex_exit(&(log->mutex));
+
+	    	return(ut_dulint_zero);
+	}
+
+	*start_lsn = log->lsn;
+	
+	ut_memcpy(log->buf + log->buf_free, str, len);
+
+	log_block_set_data_len(ut_align_down(log->buf + log->buf_free,
+				 		OS_FILE_LOG_BLOCK_SIZE),
+				data_len);
+#ifdef UNIV_LOG_DEBUG
+	log->old_buf_free = log->buf_free;
+	log->old_lsn = log->lsn;
+#endif
+	log->buf_free += len;
+	
+	ut_ad(log->buf_free <= log->buf_size);
+
+	lsn = ut_dulint_add(log->lsn, len);
+
+	log->lsn = lsn;
+
+#ifdef UNIV_LOG_DEBUG
+	log_check_log_recs(log->buf + log->old_buf_free,
+			log->buf_free - log->old_buf_free, log->old_lsn);	
+#endif
+	return(lsn);
+}
+
+/***************************************************************************
+Releases the log mutex. */
+UNIV_INLINE
+void
+log_release(void)
+/*=============*/
+{
+	mutex_exit(&(log_sys->mutex));
+}
+
+/****************************************************************
+Gets the current lsn. */
+UNIV_INLINE
+dulint
+log_get_lsn(void)
+/*=============*/
+			/* out: current lsn */
+{
+	dulint	lsn;
+
+	mutex_enter(&(log_sys->mutex));
+
+	lsn = log_sys->lsn;
+
+	mutex_exit(&(log_sys->mutex));
+
+	return(lsn);
+}
+
+/***************************************************************************
+Checks if there is need for a log buffer flush or a new checkpoint, and does
+this if yes. Any database operation should call this when it has modified
+more than about 4 pages. NOTE that this function may only be called when the
+OS thread owns no synchronization objects except the dictionary mutex. */
+UNIV_INLINE
+void
+log_free_check(void)
+/*================*/
+{
+	/* ut_ad(sync_thread_levels_empty()); */
+
+	if (log_sys->check_flush_or_checkpoint) {
+
+		log_check_margins();
+	}
+}
+
+/****************************************************************************
+Gets the online backup lsn. */
+UNIV_INLINE
+dulint
+log_get_online_backup_lsn_low(void)
+/*===============================*/
+				/* out: online_backup_lsn, the caller must
+				own the log_sys mutex */
+{
+	ut_ad(mutex_own(&(log_sys->mutex)));
+	ut_ad(log_sys->online_backup_state);
+
+	return(log_sys->online_backup_lsn);
+}
+
+/****************************************************************************
+Gets the online backup state. */
+UNIV_INLINE
+ibool
+log_get_online_backup_state_low(void)
+/*=================================*/
+				/* out: online backup state, the caller must
+				own the log_sys mutex */
+{
+	ut_ad(mutex_own(&(log_sys->mutex)));
+
+	return(log_sys->online_backup_state);
+}
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
new file mode 100644
index 00000000000..51f14393d38
--- /dev/null
+++ b/innobase/include/log0recv.h
@@ -0,0 +1,284 @@
+/******************************************************
+Recovery
+
+(c) 1997 Innobase Oy
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef log0recv_h
+#define log0recv_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "page0types.h"
+#include "hash0hash.h"
+#include "log0log.h"
+
+/***********************************************************************
+Returns TRUE if recovery is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void);
+/*=====================*/
+/***********************************************************************
+Returns TRUE if recovery from backup is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void);
+/*=================================*/
+/****************************************************************************
+Applies the hashed log records to the page, if the page lsn is less than the
+lsn of a log record. This can be called when a buffer page has just been
+read in, or also for a page already in the buffer pool. */
+
+void
+recv_recover_page(
+/*==============*/
+	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
+				a freshly read page */
+	page_t*	page,		/* in: buffer page */
+	ulint	space,		/* in: space id */
+	ulint	page_no);	/* in: page number */
+/************************************************************
+Recovers from a checkpoint. When this function returns, the database is able
+to start processing of new user transactions, but the function
+recv_recovery_from_checkpoint_finish should be called later to complete
+the recovery and free the resources used in it. */
+
+ulint
+recv_recovery_from_checkpoint_start(
+/*================================*/
+				/* out: error code or DB_SUCCESS */
+	ulint	type,		/* in: LOG_CHECKPOINT or LOG_ARCHIVE */
+	dulint	limit_lsn,	/* in: recover up to this lsn if possible */
+	dulint	min_flushed_lsn,/* in: min flushed lsn from data files */
+	dulint	max_flushed_lsn);/* in: max flushed lsn from data files */
+/************************************************************
+Completes recovery from a checkpoint. */
+
+void
+recv_recovery_from_checkpoint_finish(void);
+/*======================================*/
+/***********************************************************
+Scans log from a buffer and stores new log data to the parsing buffer. Parses
+and hashes the log records if new data found. */
+
+ibool
+recv_scan_log_recs(
+/*===============*/
+				/* out: TRUE if limit_lsn has been reached, or
+				not able to scan any more in this log group */
+	ibool	store_to_hash,	/* in: TRUE if the records should be stored
+				to the hash table; this is set FALSE if just
+				debug checking is needed */
+	byte*	buf,		/* in: buffer containing a log segment or
+				garbage */
+	ulint	len,		/* in: buffer length */
+	dulint	start_lsn,	/* in: buffer start lsn */
+	dulint*	contiguous_lsn,	/* in/out: it is known that all log groups
+				contain contiguous log data up to this lsn */
+	dulint*	group_scanned_lsn);/* out: scanning succeeded up to this lsn */
+/**********************************************************
+Resets the logs. The contents of log files will be lost! */
+
+void
+recv_reset_logs(
+/*============*/
+	dulint	lsn,		/* in: reset to this lsn rounded up to
+				be divisible by OS_FILE_LOG_BLOCK_SIZE,
+				after which we add LOG_BLOCK_HDR_SIZE */
+	ulint	arch_log_no,	/* in: next archived log file number */
+	ibool	new_logs_created);/* in: TRUE if resetting logs is done
+				at the log creation; FALSE if it is done
+				after archive recovery */
+/************************************************************
+Creates the recovery system. */
+
+void
+recv_sys_create(void);
+/*=================*/
+/************************************************************
+Inits the recovery system for a recovery operation. */
+
+void
+recv_sys_init(void);
+/*===============*/
+/***********************************************************************
+Empties the hash table of stored log records, applying them to appropriate
+pages. */
+
+void
+recv_apply_hashed_log_recs(
+/*=======================*/
+	ibool	allow_ibuf);	/* in: if TRUE, also ibuf operations are
+				allowed during the application; if FALSE,
+				no ibuf operations are allowed, and after
+				the application all file pages are flushed to
+				disk and invalidated in buffer pool: this
+				alternative means that no new log records
+				can be generated during the application */
+/************************************************************
+Recovers from archived log files, and also from log files, if they exist. */
+
+ulint
+recv_recovery_from_archive_start(
+/*=============================*/
+				/* out: error code or DB_SUCCESS */
+	dulint	min_flushed_lsn,/* in: min flushed lsn field from the
+				data files */
+	dulint	limit_lsn,	/* in: recover up to this lsn if possible */
+	ulint	first_log_no);	/* in: number of the first archived log file
+				to use in the recovery; the file will be
+				searched from INNOBASE_LOG_ARCH_DIR specified
+				in server config file */
+/************************************************************
+Completes recovery from archive. */
+
+void
+recv_recovery_from_archive_finish(void);
+/*===================================*/
+/***********************************************************************
+Checks that a replica of a space is identical to the original space. */
+
+void
+recv_compare_spaces(
+/*================*/
+	ulint	space1,	/* in: space id */
+	ulint	space2,	/* in: space id */
+	ulint	n_pages);/* in: number of pages */
+/***********************************************************************
+Checks that a replica of a space is identical to the original space. Disables
+ibuf operations and flushes and invalidates the buffer pool pages after the
+test. This function can be used to check the recovery before dict or trx
+systems are initialized. */
+
+void
+recv_compare_spaces_low(
+/*====================*/
+	ulint	space1,	/* in: space id */
+	ulint	space2,	/* in: space id */
+	ulint	n_pages);/* in: number of pages */
+
+/* Block of log record data */
+typedef struct recv_data_struct	recv_data_t;
+struct recv_data_struct{
+	recv_data_t*	next;	/* pointer to the next block or NULL */
+				/* the log record data is stored physically
+				immediately after this struct, max amount
+				RECV_DATA_BLOCK_SIZE bytes of it */
+};
+
+/* Stored log record struct */
+typedef struct recv_struct	recv_t;
+struct recv_struct{
+	byte		type;	/* log record type */
+	ulint		len;	/* log record body length in bytes */
+	recv_data_t*	data;	/* chain of blocks containing the log record
+				body */
+	dulint		start_lsn;/* start lsn of the log segment written by
+				the mtr which generated this log record: NOTE
+				that this is not necessarily the start lsn of
+				this log record */
+	dulint		end_lsn;/* end lsn of the log segment written by
+				the mtr which generated this log record: NOTE
+				that this is not necessarily the end lsn of
+				this log record */
+	UT_LIST_NODE_T(recv_t)
+			rec_list;/* list of log records for this page */
+};
+
+/* Hashed page file address struct */
+typedef struct recv_addr_struct	recv_addr_t;
+struct recv_addr_struct{
+	ulint		state;	/* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED,
+				or RECV_PROCESSED */
+	ulint		space;	/* space id */
+	ulint		page_no;/* page number */
+	UT_LIST_BASE_NODE_T(recv_t)
+			rec_list;/* list of log records for this page */
+	hash_node_t	addr_hash;
+};
+
+/* Recovery system data structure */
+typedef struct recv_sys_struct	recv_sys_t;
+struct recv_sys_struct{
+	mutex_t		mutex;	/* mutex protecting the fields apply_log_recs,
+				n_addrs, and the state field in each recv_addr
+				struct */
+	ibool		apply_log_recs;
+				/* this is TRUE when log rec application to
+				pages is allowed; this flag tells the
+				i/o-handler if it should do log record
+				application */
+	ibool		apply_batch_on;
+				/* this is TRUE when a log rec application
+				batch is running */
+	dulint		lsn;	/* log sequence number */
+	ulint		last_log_buf_size;
+				/* size of the log buffer when the database
+				last time wrote to the log */
+	byte*		last_block;
+				/* possible incomplete last recovered log
+				block */
+	byte*		last_block_buf_start;
+				/* the nonaligned start address of the
+				preceding buffer */
+	byte*		buf;	/* buffer for parsing log records */
+	ulint		len;	/* amount of data in buf */
+	dulint		parse_start_lsn;
+				/* this is the lsn from which we were able to
+				start parsing log records and adding them to
+				the hash table; ut_dulint_zero if a suitable
+				start point not found yet */
+	dulint		scanned_lsn;
+				/* the log data has been scanned up to this
+				lsn */
+	ulint		scanned_checkpoint_no;
+				/* the log data has been scanned up to this
+				checkpoint number (lowest 4 bytes) */
+	ulint		recovered_offset;
+				/* start offset of non-parsed log records in
+				buf */
+	dulint		recovered_lsn;
+				/* the log records have been parsed up to
+				this lsn */
+	dulint		limit_lsn;/* recovery should be made at most up to this
+				lsn */
+	log_group_t*	archive_group;
+				/* in archive recovery: the log group whose
+				archive is read */
+	mem_heap_t*	heap;	/* memory heap of log records and file
+				addresses*/
+	hash_table_t*	addr_hash;/* hash table of file addresses of pages */
+	ulint		n_addrs;/* number of not processed hashed file
+				addresses in the hash table */
+};
+
+extern recv_sys_t*	recv_sys;
+extern ibool		recv_recovery_on;
+extern ibool		recv_no_ibuf_operations;
+
+/* States of recv_addr_struct */
+#define RECV_NOT_PROCESSED	71
+#define RECV_BEING_READ		72
+#define RECV_BEING_PROCESSED	73
+#define RECV_PROCESSED		74
+
+/* The number which is added to a space id to obtain the replicate space
+in the debug version: spaces with an odd number as the id are replicate
+spaces */
+#define RECV_REPLICA_SPACE_ADD	1
+
+/* This many blocks must be left free in the buffer pool when we scan
+the log and store the scanned log records in the buffer pool: we will
+use these free blocks to read in pages when we start applying the
+log records to the database. */
+
+#define RECV_POOL_N_FREE_BLOCKS	 (ut_min(256, buf_pool_get_curr_size() / 8))
+
+#ifndef UNIV_NONINL
+#include "log0recv.ic"
+#endif
+
+#endif
diff --git a/innobase/include/log0recv.ic b/innobase/include/log0recv.ic
new file mode 100644
index 00000000000..489641bade2
--- /dev/null
+++ b/innobase/include/log0recv.ic
@@ -0,0 +1,35 @@
+/******************************************************
+Recovery
+
+(c) 1997 Innobase Oy
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "log0log.h"
+#include "os0file.h"
+
+extern ibool	recv_recovery_from_backup_on;
+
+/***********************************************************************
+Returns TRUE if recovery is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_is_on(void)
+/*=====================*/
+{
+	return(recv_recovery_on);
+}
+
+/***********************************************************************
+Returns TRUE if recovery from backup is currently running. */
+UNIV_INLINE
+ibool
+recv_recovery_from_backup_is_on(void)
+/*=================================*/
+{
+	return(recv_recovery_from_backup_on);
+}
+
diff --git a/innobase/include/mach0data.h b/innobase/include/mach0data.h
new file mode 100644
index 00000000000..006f55d5f1f
--- /dev/null
+++ b/innobase/include/mach0data.h
@@ -0,0 +1,332 @@
+/**********************************************************************
+Utilities for converting data from the database file
+to the machine format. 
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef mach0data_h
+#define mach0data_h
+
+#include "univ.i"
+#include "ut0byte.h"
+
+/* The data and all fields are always stored in a database file
+in the same format: ascii, big-endian, ... .
+All data in the files MUST be accessed using the functions in this
+module. */
+
+/***********************************************************
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void 
+mach_write_to_1(
+/*============*/
+	byte*   b,      /* in: pointer to byte where to store */
+	ulint   n);      /* in: ulint integer to be stored, >= 0, < 256 */ 
+/************************************************************
+The following function is used to fetch data from one byte. */
+UNIV_INLINE
+ulint 
+mach_read_from_1(
+/*=============*/
+			/* out: ulint integer, >= 0, < 256 */
+	byte*   b);      /* in: pointer to byte */
+/***********************************************************
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lower address. */
+UNIV_INLINE
+void 
+mach_write_to_2(
+/*============*/
+	byte*   b,      /* in: pointer to two bytes where to store */
+	ulint   n);      /* in: ulint integer to be stored, >= 0, < 64k */ 
+/************************************************************
+The following function is used to fetch data from two consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read_from_2(
+/*=============*/
+			/* out: ulint integer, >= 0, < 64k */
+	byte*   b);      /* in: pointer to two bytes */
+/***********************************************************
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_3(
+/*============*/
+	byte*   b,      /* in: pointer to 3 bytes where to store */
+	ulint	n);      /* in: ulint integer to be stored */ 
+/************************************************************
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read_from_3(
+/*=============*/
+			/* out: ulint integer */
+	byte*   b);      /* in: pointer to 3 bytes */
+/***********************************************************
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_4(
+/*============*/
+	byte*   b,      /* in: pointer to four bytes where to store */
+	ulint	n);      /* in: ulint integer to be stored */ 
+/************************************************************
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read_from_4(
+/*=============*/
+			/* out: ulint integer */
+	byte*   b);      /* in: pointer to four bytes */
+/***********************************************************
+The following function is used to store data from a ulint to memory
+in standard order:
+we store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write(
+/*=======*/
+	byte*   b,     /* in: pointer to sizeof(ulint) bytes where to store */
+	ulint   n);      /* in: ulint integer to be stored */ 
+/************************************************************
+The following function is used to fetch data from memory to a ulint.
+The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read(
+/*======*/
+			/* out: ulint integer */
+	byte*   b);      /* in: pointer to sizeof(ulint) bytes */
+/*************************************************************
+Writes a ulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+			/* out: stored size in bytes */
+	byte*   b,      /* in: pointer to memory where to store */
+	ulint   n);     /* in: ulint integer to be stored */ 
+/*************************************************************
+Returns the size of an ulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+			/* out: compressed size in bytes */
+	ulint   n);     /* in: ulint integer to be stored */ 
+/*************************************************************
+Reads a ulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+			/* out: read integer */
+	byte*   b);     /* in: pointer to memory from where to read */
+/***********************************************************
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_6(
+/*============*/
+	byte*   b,      /* in: pointer to 6 bytes where to store */
+	dulint	n);      /* in: dulint integer to be stored */ 
+/************************************************************
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint 
+mach_read_from_6(
+/*=============*/
+			/* out: dulint integer */
+	byte*   b);      /* in: pointer to 6 bytes */
+/***********************************************************
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_7(
+/*============*/
+	byte*   b,      /* in: pointer to 7 bytes where to store */
+	dulint	n);      /* in: dulint integer to be stored */ 
+/************************************************************
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint 
+mach_read_from_7(
+/*=============*/
+			/* out: dulint integer */
+	byte*   b);      /* in: pointer to 7 bytes */
+/***********************************************************
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_8(
+/*============*/
+	byte*   b,      /* in: pointer to 8 bytes where to store */
+	dulint	n);     /* in: dulint integer to be stored */ 
+/************************************************************
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint 
+mach_read_from_8(
+/*=============*/
+			/* out: dulint integer */
+	byte*   b);      /* in: pointer to 8 bytes */
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_compressed(
+/*=========================*/
+			/* out: size in bytes */
+	byte*   b,      /* in: pointer to memory where to store */
+	dulint  n);     /* in: dulint integer to be stored */ 
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_compressed_size(
+/*============================*/
+			/* out: compressed size in bytes */
+	dulint   n);    /* in: dulint integer to be stored */ 
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_compressed(
+/*========================*/
+			/* out: read dulint */
+	byte*   b);     /* in: pointer to memory from where to read */
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_much_compressed(
+/*==============================*/
+			/* out: size in bytes */
+	byte*   b,      /* in: pointer to memory where to store */
+	dulint  n);     /* in: dulint integer to be stored */ 
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_much_compressed_size(
+/*=================================*/
+			/* out: compressed size in bytes */
+	dulint   n);     /* in: dulint integer to be stored */ 
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_much_compressed(
+/*=============================*/
+			/* out: read dulint */
+	byte*   b);      /* in: pointer to memory from where to read */
+/*************************************************************
+Reads a ulint in a compressed form if the log record fully contains it. */
+
+byte*
+mach_parse_compressed(
+/*==================*/
+			/* out: pointer to end of the stored field, NULL if
+			not complete */
+	byte*   ptr,   	/* in: pointer to buffer from where to read */
+	byte*	end_ptr,/* in: pointer to end of the buffer */
+	ulint*	val);	/* out: read value */ 
+/*************************************************************
+Reads a dulint in a compressed form if the log record fully contains it. */
+
+byte*
+mach_dulint_parse_compressed(
+/*=========================*/
+			/* out: pointer to end of the stored field, NULL if
+			not complete */
+	byte*   ptr,   	/* in: pointer to buffer from where to read */
+	byte*	end_ptr,/* in: pointer to end of the buffer */
+	dulint*	val);	/* out: read value */ 
+/*************************************************************
+Reads a double. It is stored in a little-endian format. */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+			/* out: double read */
+	byte*   b);      /* in: pointer to memory from where to read */
+/*************************************************************
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+	byte*   b,      /* in: pointer to memory where to write */
+	double 	d);	/* in: double */
+/*************************************************************
+Reads a float. It is stored in a little-endian format. */
+UNIV_INLINE
+float
+mach_float_read(
+/*=============*/
+			/* out: float read */
+	byte*   b);      /* in: pointer to memory from where to read */
+/*************************************************************
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*==============*/
+	byte*   b,      /* in: pointer to memory where to write */
+	float 	d);	/* in: float */
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+				/* out: unsigned long int */
+	byte*	buf,		/* in: from where to read */
+	ulint	buf_size);	/* in: from how many bytes to read */
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+	byte*	dest,		/* in: where to write */
+	ulint	dest_size,	/* in: into how many bytes to write */
+	ulint	n);		/* in: unsigned long int to write */
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+				/* out: unsigned long int */
+	byte*	buf);		/* in: from where to read */
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+	byte*	dest,		/* in: where to write */
+	ulint	n);		/* in: unsigned long int to write */
+	
+#ifndef UNIV_NONINL
+#include "mach0data.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mach0data.ic b/innobase/include/mach0data.ic
new file mode 100644
index 00000000000..6c93cb687a5
--- /dev/null
+++ b/innobase/include/mach0data.ic
@@ -0,0 +1,727 @@
+/**********************************************************************
+Utilities for converting data from the database file
+to the machine format. 
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+/***********************************************************
+The following function is used to store data in one byte. */
+UNIV_INLINE
+void 
+mach_write_to_1(
+/*============*/
+	byte*   b,      /* in: pointer to byte where to store */
+	ulint   n)      /* in: ulint integer to be stored, >= 0, < 256 */ 
+{
+	ut_ad(b);
+	ut_ad((n >= 0) && (n <= 0xFF));
+
+	b[0] = (byte)n;
+}
+
+/************************************************************
+The following function is used to fetch data from one byte. */
+UNIV_INLINE
+ulint 
+mach_read_from_1(
+/*=============*/
+			/* out: ulint integer, >= 0, < 256 */
+	byte*   b)      /* in: pointer to byte */
+{
+	ut_ad(b);
+	return((ulint)(b[0]));
+}
+
+/***********************************************************
+The following function is used to store data in two consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_2(
+/*============*/
+	byte*   b,      /* in: pointer to two bytes where to store */
+	ulint	n)      /* in: ulint integer to be stored */ 
+{
+	ut_ad(b);
+	ut_ad(n <= 0xFFFF);
+
+	b[0] = (byte)(n >> 8);
+	b[1] = (byte)(n);
+}
+
+/************************************************************
+The following function is used to fetch data from 2 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read_from_2(
+/*=============*/
+			/* out: ulint integer */
+	byte*   b)      /* in: pointer to 2 bytes */
+{
+	ut_ad(b);
+	return( ((ulint)(b[0]) << 8)
+		+ (ulint)(b[1])
+	      );
+}
+
+/***********************************************************
+The following function is used to store data in 3 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_3(
+/*============*/
+	byte*   b,      /* in: pointer to 3 bytes where to store */
+	ulint	n)      /* in: ulint integer to be stored */ 
+{
+	ut_ad(b);
+	ut_ad(n <= 0xFFFFFF);
+
+	b[0] = (byte)(n >> 16);
+	b[1] = (byte)(n >> 8);
+	b[2] = (byte)(n);
+}
+
+/************************************************************
+The following function is used to fetch data from 3 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read_from_3(
+/*=============*/
+			/* out: ulint integer */
+	byte*   b)      /* in: pointer to 3 bytes */
+{
+	ut_ad(b);
+	return( ((ulint)(b[0]) << 16)
+		+ ((ulint)(b[1]) << 8)
+		+ (ulint)(b[2])
+	      );
+}
+
+/***********************************************************
+The following function is used to store data in four consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_4(
+/*============*/
+	byte*   b,      /* in: pointer to four bytes where to store */
+	ulint	n)      /* in: ulint integer to be stored */ 
+{
+	ut_ad(b);
+
+#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
+
+	/* We do not use this even on Intel, because unaligned accesses may
+	be slow */
+
+	__asm	MOV	EAX, n
+	__asm	BSWAP	EAX	/* Intel is little-endian, must swap bytes */
+	__asm	MOV	n, EAX
+	
+	*((ulint*)b) = n;
+#else
+	b[0] = (byte)(n >> 24);
+	b[1] = (byte)(n >> 16);
+	b[2] = (byte)(n >> 8);
+	b[3] = (byte)n;
+#endif
+}
+
+/************************************************************
+The following function is used to fetch data from 4 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read_from_4(
+/*=============*/
+			/* out: ulint integer */
+	byte*   b)      /* in: pointer to four bytes */
+{
+#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
+	/* We do not use this even on Intel, because unaligned accesses may
+	be slow */
+
+	ulint	res;
+
+	ut_ad(b);
+
+	__asm	MOV	EDX, b
+	__asm	MOV	ECX, DWORD PTR [EDX]
+	__asm	BSWAP	ECX	/* Intel is little-endian, must swap bytes */
+	__asm	MOV	res, ECX
+
+	return(res);
+#else
+	ut_ad(b);
+	return( ((ulint)(b[0]) << 24)
+		+ ((ulint)(b[1]) << 16)
+		+ ((ulint)(b[2]) << 8)
+		+ (ulint)(b[3])
+	      );
+#endif
+}
+
+/***********************************************************
+The following function is used to store data from a ulint to memory
+in standard order: we store the most significant byte to the lowest
+address. */
+UNIV_INLINE
+void 
+mach_write(
+/*=======*/
+	byte*   b,      /* in: pointer to 4 bytes where to store */
+	ulint   n)      /* in: ulint integer to be stored */ 
+{
+	ut_ad(b);
+
+	b[0] = (byte)(n >> 24);
+	b[1] = (byte)(n >> 16);
+	b[2] = (byte)(n >> 8);
+	b[3] = (byte)n;
+}
+
+/************************************************************
+The following function is used to fetch data from memory to a ulint.
+The most significant byte is at the lowest address. */
+UNIV_INLINE
+ulint 
+mach_read(
+/*======*/
+			/* out: ulint integer */
+	byte*   b)      /* in: pointer to 4 bytes */
+{
+	ut_ad(b);
+
+	return( ((ulint)(b[0]) << 24)
+		+ ((ulint)(b[1]) << 16)
+		+ ((ulint)(b[2]) << 8)
+		+ (ulint)(b[3])
+	      );
+}
+
+/*************************************************************
+Writes a ulint in a compressed form where the first byte codes the
+length of the stored ulint. We look at the most significant bits of
+the byte. If the most significant bit is zero, it means 1-byte storage,
+else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
+it means 3-byte storage, else if 4th is 0, it means 4-byte storage, 
+else the storage is 5-byte. */
+UNIV_INLINE
+ulint
+mach_write_compressed(
+/*==================*/
+			/* out: compressed size in bytes */
+	byte*   b,      /* in: pointer to memory where to store */
+	ulint   n)      /* in: ulint integer (< 2^32) to be stored */ 
+{
+	ut_ad(b);
+
+	if (n < 0x80) {
+		mach_write_to_1(b, n);
+		return(1);
+	} else if (n < 0x4000) {
+		mach_write_to_2(b, n | 0x8000);
+		return(2);
+	} else if (n < 0x200000) {
+		mach_write_to_3(b, n | 0xC00000);
+		return(3);
+	} else if (n < 0x10000000) {
+		mach_write_to_4(b, n | 0xE0000000);
+		return(4);
+	} else {
+		mach_write_to_1(b, 0xF0);
+		mach_write_to_4(b + 1, n);
+		return(5);
+	}
+}
+
+/*************************************************************
+Returns the size of a ulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_get_compressed_size(
+/*=====================*/
+			/* out: compressed size in bytes */
+	ulint   n)      /* in: ulint integer (< 2^32) to be stored */ 
+{
+	if (n < 0x80) {
+		return(1);
+	} else if (n < 0x4000) {
+		return(2);
+	} else if (n < 0x200000) {
+		return(3);
+	} else if (n < 0x10000000) {
+		return(4);
+	} else {
+		return(5);
+	}
+}
+
+/*************************************************************
+Reads a ulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_read_compressed(
+/*=================*/
+			/* out: read integer (< 2^32) */
+	byte*   b)      /* in: pointer to memory from where to read */
+{
+	ulint	flag;
+
+	ut_ad(b);
+
+	flag = mach_read_from_1(b);
+
+	if (flag < 0x80) {
+		return(flag);
+	} else if (flag < 0xC0) {
+		return(mach_read_from_2(b) & 0x7FFF);
+	} else if (flag < 0xE0) {
+		return(mach_read_from_3(b) & 0x3FFFFF);
+	} else if (flag < 0xF0) {
+		return(mach_read_from_4(b) & 0x1FFFFFFF);
+	} else {
+		ut_ad(flag == 0xF0);
+		return(mach_read_from_4(b + 1));
+	}
+}
+
+/***********************************************************
+The following function is used to store data in 8 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_8(
+/*============*/
+	byte*   b,      /* in: pointer to 8 bytes where to store */
+	dulint	n)      /* in: dulint integer to be stored */ 
+{
+	ut_ad(b);
+
+	mach_write_to_4(b, ut_dulint_get_high(n));
+	mach_write_to_4(b + 4, ut_dulint_get_low(n));
+}
+
+/************************************************************
+The following function is used to fetch data from 8 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint 
+mach_read_from_8(
+/*=============*/
+			/* out: dulint integer */
+	byte*   b)      /* in: pointer to 8 bytes */
+{
+	ulint	high;
+	ulint	low;
+
+	ut_ad(b);
+
+	high = mach_read_from_4(b);
+	low = mach_read_from_4(b + 4);
+
+	return(ut_dulint_create(high, low)); 
+}
+
+/***********************************************************
+The following function is used to store data in 7 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_7(
+/*============*/
+	byte*   b,      /* in: pointer to 7 bytes where to store */
+	dulint	n)      /* in: dulint integer to be stored */ 
+{
+	ut_ad(b);
+
+	mach_write_to_3(b, ut_dulint_get_high(n));
+	mach_write_to_4(b + 3, ut_dulint_get_low(n));
+}
+
+/************************************************************
+The following function is used to fetch data from 7 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint 
+mach_read_from_7(
+/*=============*/
+			/* out: dulint integer */
+	byte*   b)      /* in: pointer to 7 bytes */
+{
+	ulint	high;
+	ulint	low;
+
+	ut_ad(b);
+
+	high = mach_read_from_3(b);
+	low = mach_read_from_4(b + 3);
+
+	return(ut_dulint_create(high, low)); 
+}
+
+/***********************************************************
+The following function is used to store data in 6 consecutive
+bytes. We store the most significant byte to the lowest address. */
+UNIV_INLINE
+void 
+mach_write_to_6(
+/*============*/
+	byte*   b,      /* in: pointer to 6 bytes where to store */
+	dulint	n)      /* in: dulint integer to be stored */ 
+{
+	ut_ad(b);
+
+	mach_write_to_2(b, ut_dulint_get_high(n));
+	mach_write_to_4(b + 2, ut_dulint_get_low(n));
+}
+
+/************************************************************
+The following function is used to fetch data from 6 consecutive
+bytes. The most significant byte is at the lowest address. */
+UNIV_INLINE
+dulint 
+mach_read_from_6(
+/*=============*/
+			/* out: dulint integer */
+	byte*   b)      /* in: pointer to 7 bytes */
+{
+	ulint	high;
+	ulint	low;
+
+	ut_ad(b);
+
+	high = mach_read_from_2(b);
+	low = mach_read_from_4(b + 2);
+
+	return(ut_dulint_create(high, low)); 
+}
+
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_compressed(
+/*=========================*/
+			/* out: size in bytes */
+	byte*   b,      /* in: pointer to memory where to store */
+	dulint  n)     	/* in: dulint integer to be stored */ 
+{
+	ulint	size;
+
+	ut_ad(b);
+
+	size = mach_write_compressed(b, ut_dulint_get_high(n));
+	mach_write_to_4(b + size, ut_dulint_get_low(n));
+
+	return(size + 4);
+}
+
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_compressed_size(
+/*============================*/
+			/* out: compressed size in bytes */
+	dulint   n)     /* in: dulint integer to be stored */ 
+{
+	return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
+}
+
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_compressed(
+/*========================*/
+			/* out: read dulint */
+	byte*   b)      /* in: pointer to memory from where to read */
+{
+	ulint	high;
+	ulint	low;
+	ulint	size;
+
+	ut_ad(b);
+
+	high = mach_read_compressed(b);
+
+	size = mach_get_compressed_size(high);
+
+	low = mach_read_from_4(b + size);
+
+	return(ut_dulint_create(high, low)); 
+}
+
+/*************************************************************
+Writes a dulint in a compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_write_much_compressed(
+/*==============================*/
+			/* out: size in bytes */
+	byte*   b,      /* in: pointer to memory where to store */
+	dulint  n)     	/* in: dulint integer to be stored */ 
+{
+	ulint	size;
+
+	ut_ad(b);
+	
+	if (ut_dulint_get_high(n) == 0) {
+		return(mach_write_compressed(b, ut_dulint_get_low(n)));
+	}
+	
+	*b = 0xFF;
+	size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n));
+
+	size += mach_write_compressed(b + size, ut_dulint_get_low(n));
+
+	return(size);
+}
+
+/*************************************************************
+Returns the size of a dulint when written in the compressed form. */
+UNIV_INLINE
+ulint
+mach_dulint_get_much_compressed_size(
+/*=================================*/
+			/* out: compressed size in bytes */
+	dulint   n)     /* in: dulint integer to be stored */ 
+{
+	if (0 == ut_dulint_get_high(n)) {
+		return(mach_get_compressed_size(ut_dulint_get_low(n)));
+	}
+
+	return(1 + mach_get_compressed_size(ut_dulint_get_high(n))
+	       + mach_get_compressed_size(ut_dulint_get_low(n)));
+}
+
+/*************************************************************
+Reads a dulint in a compressed form. */
+UNIV_INLINE
+dulint
+mach_dulint_read_much_compressed(
+/*=============================*/
+			/* out: read dulint */
+	byte*   b)      /* in: pointer to memory from where to read */
+{
+	ulint	high;
+	ulint	low;
+	ulint	size;
+
+	ut_ad(b);
+
+	if (*b != 0xFF) {
+		high = 0;
+		size = 0;
+	} else {
+		high = mach_read_compressed(b + 1);
+
+		size = 1 + mach_get_compressed_size(high);
+	}
+
+	low = mach_read_compressed(b + size);
+
+	return(ut_dulint_create(high, low)); 
+}
+
+/*************************************************************
+Reads a double. It is stored in a little-endian format. */
+UNIV_INLINE
+double
+mach_double_read(
+/*=============*/
+			/* out: double read */
+	byte*   b)      /* in: pointer to memory from where to read */
+{
+	double 	d;
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*)&d;
+
+	for (i = 0; i < sizeof(double); i++) {
+#ifdef WORDS_BIGENDIAN
+		ptr[sizeof(double) - i - 1] = b[i];
+#else
+		ptr[i] = b[i];
+#endif
+	}
+
+	return(d);	
+}
+
+/*************************************************************
+Writes a double. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_double_write(
+/*==============*/
+	byte*   b,      /* in: pointer to memory where to write */
+	double 	d)	/* in: double */
+{
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*)&d;
+
+	for (i = 0; i < sizeof(double); i++) {
+#ifdef WORDS_BIGENDIAN
+		b[i] = ptr[sizeof(double) - i - 1];
+#else
+		b[i] = ptr[i];
+#endif
+	}
+}
+
+/*************************************************************
+Reads a float. It is stored in a little-endian format. */
+UNIV_INLINE
+float
+mach_float_read(
+/*=============*/
+			/* out: float read */
+	byte*   b)      /* in: pointer to memory from where to read */
+{
+	float 	d;
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*)&d;
+
+	for (i = 0; i < sizeof(float); i++) {
+#ifdef WORDS_BIGENDIAN
+		ptr[sizeof(float) - i - 1] = b[i];
+#else
+		ptr[i] = b[i];
+#endif
+	}
+
+	return(d);	
+}
+
+/*************************************************************
+Writes a float. It is stored in a little-endian format. */
+UNIV_INLINE
+void
+mach_float_write(
+/*==============*/
+	byte*   b,      /* in: pointer to memory where to write */
+	float 	d)	/* in: float */
+{
+	ulint	i;
+	byte*	ptr;
+
+	ptr = (byte*)&d;
+
+	for (i = 0; i < sizeof(float); i++) {
+#ifdef WORDS_BIGENDIAN
+		b[i] = ptr[sizeof(float) - i - 1];
+#else
+		b[i] = ptr[i];
+#endif
+	}
+}
+
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_n_little_endian(
+/*===========================*/
+				/* out: unsigned long int */
+	byte*	buf,		/* in: from where to read */
+	ulint	buf_size)	/* in: from how many bytes to read */
+{
+	ulint	n	= 0;
+	byte*	ptr;
+
+	ut_ad(buf_size <= sizeof(ulint));
+	ut_ad(buf_size > 0);
+
+	ptr = buf + buf_size;
+	
+	for (;;) {
+		ptr--;
+
+		n = n << 8;
+
+		n += (ulint)(*ptr);
+
+		if (ptr == buf) {
+			break;
+		}
+	}
+
+	return(n);
+}
+
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_n_little_endian(
+/*==========================*/
+	byte*	dest,		/* in: where to write */
+	ulint	dest_size,	/* in: into how many bytes to write */
+	ulint	n)		/* in: unsigned long int to write */
+{
+	byte*	end;
+
+	ut_ad(dest_size <= sizeof(ulint));
+	ut_ad(dest_size > 0);
+
+	end = dest + dest_size;
+	
+	for (;;) {
+		*dest = (byte)(n & 0xFF);
+
+		n = n >> 8;
+
+		dest++;
+
+		if (dest == end) {
+			break;
+		} 
+	}
+
+	ut_ad(n == 0);
+}
+
+/*************************************************************
+Reads a ulint stored in the little-endian format. */
+UNIV_INLINE
+ulint
+mach_read_from_2_little_endian(
+/*===========================*/
+				/* out: unsigned long int */
+	byte*	buf)		/* in: from where to read */
+{
+	return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256);
+}
+
+/*************************************************************
+Writes a ulint in the little-endian format. */
+UNIV_INLINE
+void
+mach_write_to_2_little_endian(
+/*==========================*/
+	byte*	dest,		/* in: where to write */
+	ulint	n)		/* in: unsigned long int to write */
+{
+	ut_ad(n < 256 * 256);
+
+	*dest = (byte)(n & 0xFF);
+
+	n = n >> 8;
+	dest++;
+
+	*dest = (byte)(n & 0xFF);
+}
+
diff --git a/innobase/include/makefilewin.i b/innobase/include/makefilewin.i
new file mode 100644
index 00000000000..f756cf2ea3a
--- /dev/null
+++ b/innobase/include/makefilewin.i
@@ -0,0 +1,34 @@
+# File included in all makefiles of the database
+# (c) Innobase Oy 1995 - 2000
+
+CCOM=cl
+
+# Flags for the debug version
+#CFL= -MTd -Za -Zi -W4 -WX -F8192 -D "WIN32"
+#CFLN = -MTd -Zi -W4 -F8192 -D "WIN32"
+#CFLW = -MTd -Zi -W3 -WX -F8192 -D "WIN32"
+#LFL =
+
+# Flags for the fast version
+#CFL= -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#CFLN = -MT -Zi -Og -O2 -W3 -D "WIN32"
+#CFLW = -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#LFL =
+
+# Flags for the fast debug version
+CFL= -MTd -Zi -W3 -WX -F8192 -D "WIN32"
+CFLN = -MTd -Zi -W3 -F8192 -D "WIN32"
+CFLW = -MTd -Zi -W3 -WX -F8192 -D "WIN32"
+LFL = /link/NODEFAULTLIB:LIBCMT
+
+# Flags for the profiler version
+#CFL= -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#CFLN = -MT -Zi -Og -O2 -WX -D "WIN32"
+#CFLW = -MT -Zi -Og -O2 -W3 -WX -D "WIN32"
+#LFL= -link -PROFILE
+
+# Flags for the fast version without debug info (= the production version)
+#CFL= -MT -Og -O2 -G6 -W3 -WX -D "WIN32"
+#CFLN = -MT -Og -O2 -G6 -W3 -D "WIN32"
+#CFLW = -MT -Og -O2 -G6 -W3 -WX -D "WIN32"
+#LFL =
diff --git a/innobase/include/mem0dbg.h b/innobase/include/mem0dbg.h
new file mode 100644
index 00000000000..dda37626198
--- /dev/null
+++ b/innobase/include/mem0dbg.h
@@ -0,0 +1,117 @@
+/******************************************************
+The memory management: the debug code. This is not a compilation module,
+but is included in mem0mem.* !
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+/* In the debug version each allocated field is surrounded with
+check fields whose sizes are given below */
+
+#define MEM_FIELD_HEADER_SIZE   ut_calc_align(2 * sizeof(ulint),\
+						UNIV_MEM_ALIGNMENT)
+#define MEM_FIELD_TRAILER_SIZE  sizeof(ulint)
+
+#define MEM_BLOCK_MAGIC_N	764741
+
+/* Space needed when allocating for a user a field of
+length N. The space is allocated only in multiples of
+UNIV_MEM_ALIGNMENT. In the debug version there are also
+check fields at the both ends of the field. */
+#ifdef UNIV_MEM_DEBUG
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
+			       	              + MEM_FIELD_TRAILER_SIZE,\
+				          UNIV_MEM_ALIGNMENT)
+#else
+#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
+#endif
+
+/*******************************************************************
+Checks a memory heap for consistency and prints the contents if requested.
+Outputs the sum of sizes of buffers given to the user (only in
+the debug version), the physical size of the heap and the number of
+blocks in the heap. In case of error returns 0 as sizes and number
+of blocks. */
+
+void
+mem_heap_validate_or_print(
+/*=======================*/
+	mem_heap_t*   	heap, 	/* in: memory heap */
+	byte*		top,	/* in: calculate and validate only until
+				this top pointer in the heap is reached,
+				if this pointer is NULL, ignored */
+	ibool            print,  /* in: if TRUE, prints the contents
+				of the heap; works only in
+				the debug version */
+	ibool*           error,  /* out: TRUE if error */
+	ulint*          us_size,/* out: allocated memory 
+				(for the user) in the heap,
+				if a NULL pointer is passed as this
+				argument, it is ignored; in the
+				non-debug version this is always -1 */
+	ulint*          ph_size,/* out: physical size of the heap,
+				if a NULL pointer is passed as this
+				argument, it is ignored */
+	ulint*          n_blocks); /* out: number of blocks in the heap,
+				if a NULL pointer is passed as this
+				argument, it is ignored */
+/******************************************************************
+Prints the contents of a memory heap. */
+
+void
+mem_heap_print(
+/*===========*/
+	mem_heap_t*   heap);	/* in: memory heap */
+/******************************************************************
+Checks that an object is a memory heap (or a block of it) */
+
+ibool
+mem_heap_check(
+/*===========*/
+				/* out: TRUE if ok */
+	mem_heap_t*   heap);	/* in: memory heap */
+/******************************************************************
+Validates the contents of a memory heap. */
+
+ibool
+mem_heap_validate(
+/*==============*/
+				/* out: TRUE if ok */
+	mem_heap_t*   heap);	/* in: memory heap */
+/*********************************************************************
+Prints information of dynamic memory usage and currently live
+memory heaps or buffers. Can only be used in the debug version. */
+
+void
+mem_print_info(void);
+/*=================*/
+/*********************************************************************
+Prints information of dynamic memory usage and currently allocated memory
+heaps or buffers since the last ..._print_info or..._print_new_info. */
+
+void
+mem_print_new_info(void);
+/*====================*/
+/*********************************************************************
+TRUE if no memory is currently allocated. */
+
+ibool
+mem_all_freed(void);
+/*===============*/
+			/* out: TRUE if no heaps exist */
+/*********************************************************************
+Validates the dynamic memory */
+
+ibool
+mem_validate_no_assert(void);
+/*=========================*/
+			/* out: TRUE if error */
+/****************************************************************
+Validates the dynamic memory */
+
+ibool
+mem_validate(void);
+/*===============*/
+			/* out: TRUE if ok */
diff --git a/innobase/include/mem0dbg.ic b/innobase/include/mem0dbg.ic
new file mode 100644
index 00000000000..765e23e747e
--- /dev/null
+++ b/innobase/include/mem0dbg.ic
@@ -0,0 +1,91 @@
+/************************************************************************
+The memory management: the debug code. This is not an independent
+compilation module but is included in mem0mem.*.
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+extern mutex_t	mem_hash_mutex;
+extern ulint	mem_current_allocated_memory;
+
+/**********************************************************************
+Initializes an allocated memory field in the debug version. */
+
+void
+mem_field_init(
+/*===========*/
+	byte*	buf,	/* in: memory field */
+	ulint	n);	/* in: how many bytes the user requested */
+/**********************************************************************
+Erases an allocated memory field in the debug version. */
+
+void
+mem_field_erase(
+/*============*/
+	byte*	buf,	/* in: memory field */
+	ulint	n);	/* in: how many bytes the user requested */
+/*******************************************************************
+Initializes a buffer to a random combination of hex BA and BE.
+Used to initialize allocated memory. */
+
+void
+mem_init_buf(
+/*=========*/
+	byte*   buf,    /* in: pointer to buffer */
+	ulint    n);     /* in: length of buffer */
+/*******************************************************************
+Initializes a buffer to a random combination of hex DE and AD.
+Used to erase freed memory.*/
+
+void
+mem_erase_buf(
+/*==========*/
+	byte*   buf,    /* in: pointer to buffer */
+	ulint    n);     /* in: length of buffer */
+/*******************************************************************
+Inserts a created memory heap to the hash table of
+current allocated memory heaps.
+Initializes the hash table when first called. */
+
+void
+mem_hash_insert(
+/*============*/
+	mem_heap_t*	heap,	   /* in: the created heap */
+	char*		file_name, /* in: file name of creation */
+	ulint		line);	   /* in: line where created */
+/*******************************************************************
+Removes a memory heap (which is going to be freed by the caller)
+from the list of live memory heaps. Returns the size of the heap
+in terms of how much memory in bytes was allocated for the user of
+the heap (not the total space occupied by the heap).
+Also validates the heap.
+NOTE: This function does not free the storage occupied by the
+heap itself, only the node in the list of heaps. */
+
+void
+mem_hash_remove(
+/*============*/
+	mem_heap_t*	heap,	   /* in: the heap to be freed */
+	char*		file_name, /* in: file name of freeing */
+	ulint		line);	   /* in: line where freed */
+
+
+void
+mem_field_header_set_len(byte* field, ulint len);
+
+ulint
+mem_field_header_get_len(byte* field);
+
+void
+mem_field_header_set_check(byte* field, ulint check);
+
+ulint
+mem_field_header_get_check(byte* field);
+
+void
+mem_field_trailer_set_check(byte* field, ulint check);
+
+ulint
+mem_field_trailer_get_check(byte* field);
diff --git a/innobase/include/mem0mem.h b/innobase/include/mem0mem.h
new file mode 100644
index 00000000000..a2259a97503
--- /dev/null
+++ b/innobase/include/mem0mem.h
@@ -0,0 +1,350 @@
+/******************************************************
+The memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0mem_h
+#define mem0mem_h
+
+#include "univ.i"
+#include "ut0mem.h"
+#include "ut0byte.h"
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "sync0sync.h"
+#include "ut0lst.h"
+#include "mach0data.h"
+
+/* -------------------- MEMORY HEAPS ----------------------------- */
+
+/* The info structure stored at the beginning of a heap block */
+typedef struct mem_block_info_struct mem_block_info_t;
+
+/* A block of a memory heap consists of the info structure
+followed by an area of memory */
+typedef mem_block_info_t	mem_block_t;
+
+/* A memory heap is a nonempty linear list of memory blocks */
+typedef mem_block_t	mem_heap_t;
+
+/* Types of allocation for memory heaps: DYNAMIC means allocation from the
+dynamic memory pool of the C compiler, BUFFER means allocation from the index
+page buffer pool; the latter method is used for very big heaps */
+
+#define MEM_HEAP_DYNAMIC	0	/* the most common type */
+#define MEM_HEAP_BUFFER		1
+#define MEM_HEAP_BTR_SEARCH	2	/* this flag can be ORed to the
+					previous */
+
+/* The following start size is used for the first block in the memory heap if
+the size is not specified, i.e., 0 is given as the parameter in the call of
+create. The standard size is the maximum size of the blocks used for
+allocations of small buffers. */
+
+#define MEM_BLOCK_START_SIZE            64
+#define MEM_BLOCK_STANDARD_SIZE         8192
+
+/* If a memory heap is allowed to grow into the buffer pool, the following
+is the maximum size for a single allocated buffer: */
+#define MEM_MAX_ALLOC_IN_BUF		(UNIV_PAGE_SIZE - 200)
+
+/**********************************************************************
+Initializes the memory system. */
+
+void
+mem_init(
+/*=====*/
+	ulint	size);	/* in: common pool size in bytes */
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_create(N)    mem_heap_create_func(\
+						(N), NULL, MEM_HEAP_DYNAMIC,\
+						__FILE__, __LINE__)
+#else
+#define mem_heap_create(N)    mem_heap_create_func(N, NULL, MEM_HEAP_DYNAMIC)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_create_in_buffer(N)	mem_heap_create_func(\
+						(N), NULL, MEM_HEAP_BUFFER,\
+						__FILE__, __LINE__)
+#else
+#define mem_heap_create_in_buffer(N)	mem_heap_create_func(N, NULL,\
+						MEM_HEAP_BUFFER)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap creation. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\
+					(N), NULL, MEM_HEAP_BTR_SEARCH |\
+						MEM_HEAP_BUFFER,\
+						__FILE__, __LINE__)
+#else
+#define mem_heap_create_in_btr_search(N) mem_heap_create_func(N, NULL,\
+				MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for fast
+memory heap creation. An initial block of memory B is given by the
+caller, N is its size, and this memory block is not freed by
+mem_heap_free. See the parameter comment in mem_heap_create_func below. */
+#ifdef UNIV_MEM_DEBUG
+#define mem_heap_fast_create(N, B)	mem_heap_create_func(\
+						(N), (B), MEM_HEAP_DYNAMIC,\
+						__FILE__, __LINE__)
+#else
+#define mem_heap_fast_create(N, B)    	mem_heap_create_func(N, (B),\
+						MEM_HEAP_DYNAMIC)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function! Macro for memory
+heap freeing. */
+#ifdef  UNIV_MEM_DEBUG
+#define mem_heap_free(heap) mem_heap_free_func(\
+					  (heap), __FILE__, __LINE__)
+#else
+#define mem_heap_free(heap) mem_heap_free_func(heap)
+#endif
+/*********************************************************************
+NOTE: Use the corresponding macros instead of this function. Creates a
+memory heap which allocates memory from dynamic space. For debugging
+purposes, takes also the file name and line as argument in the debug
+version. */
+UNIV_INLINE
+mem_heap_t*
+mem_heap_create_func(
+/*=================*/
+				/* out, own: memory heap */
+	ulint	n,		/* in: desired start block size,
+				this means that a single user buffer
+				of size n will fit in the block, 
+				0 creates a default size block;
+				if init_block is not NULL, n tells
+				its size in bytes */
+	void*	init_block,	/* in: if very fast creation is
+				wanted, the caller can reserve some
+				memory from its stack, for example,
+				and pass it as the the initial block
+				to the heap: then no OS call of malloc
+				is needed at the creation. CAUTION:
+				the caller must make sure the initial
+				block is not unintentionally erased
+				(if allocated in the stack), before
+				the memory heap is explicitly freed. */
+	ulint	type		/* in: MEM_HEAP_DYNAMIC or MEM_HEAP_BUFFER */ 
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,	/* in: file name where created */
+	ulint	line		/* in: line where created */
+	#endif
+	);
+/*********************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Frees the space occupied by a memory heap. */
+UNIV_INLINE
+void
+mem_heap_free_func(
+/*===============*/
+	mem_heap_t*   heap  	/* in, own: heap to be freed */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,      /* in: file name where freed */
+	ulint   line            /* in: line where freed */
+	#endif
+);
+/*******************************************************************
+Allocates n bytes of memory from a memory heap. */
+UNIV_INLINE
+void*
+mem_heap_alloc(
+/*===========*/
+				/* out: allocated storage, NULL if
+				did not succeed */
+	mem_heap_t*   	heap, 	/* in: memory heap */
+	ulint           n);	/* in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+/*********************************************************************
+Returns a pointer to the heap top. */
+UNIV_INLINE
+byte*
+mem_heap_get_heap_top(
+/*==================*/     
+				/* out: pointer to the heap top */
+	mem_heap_t*   	heap); 	/* in: memory heap */
+/*********************************************************************
+Frees the space in a memory heap exceeding the pointer given. The
+pointer must have been acquired from mem_heap_get_heap_top. The first
+memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_free_heap_top(
+/*===================*/
+	mem_heap_t*   	heap,	/* in: heap from which to free */
+	byte*		old_top);/* in: pointer to old top of heap */
+/*********************************************************************
+Empties a memory heap. The first memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_empty(
+/*===========*/
+	mem_heap_t*   	heap);	/* in: heap to empty */
+/*********************************************************************
+Returns a pointer to the topmost element in a memory heap.
+The size of the element must be given. */
+UNIV_INLINE
+void*
+mem_heap_get_top(
+/*=============*/     
+				/* out: pointer to the topmost element */
+	mem_heap_t*   	heap, 	/* in: memory heap */
+	ulint           n);     /* in: size of the topmost element */
+/*********************************************************************
+Frees the topmost element in a memory heap.
+The size of the element must be given. */
+UNIV_INLINE
+void
+mem_heap_free_top(
+/*==============*/     
+	mem_heap_t*   	heap, 	/* in: memory heap */
+	ulint           n);     /* in: size of the topmost element */
+/*********************************************************************
+Returns the space in bytes occupied by a memory heap. */
+UNIV_INLINE
+ulint
+mem_heap_get_size(
+/*==============*/
+	mem_heap_t*   heap);  	/* in: heap */
+/******************************************************************
+Use this macro instead of the corresponding function!
+Macro for memory buffer allocation */
+#ifdef UNIV_MEM_DEBUG
+#define mem_alloc(N)    mem_alloc_func(\
+					  (N), __FILE__, __LINE__)
+#else
+#define mem_alloc(N)    mem_alloc_func(N)
+#endif
+/******************************************************************
+Use this macro instead of the corresponding function!
+Macro for memory buffer allocation */
+#ifdef UNIV_MEM_DEBUG
+#define mem_alloc_noninline(N)    mem_alloc_func_noninline(\
+					  (N), __FILE__, __LINE__)
+#else
+#define mem_alloc_noninline(N)    mem_alloc_func_noninline(N)
+#endif
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed 
+with mem_free. */
+UNIV_INLINE
+void*
+mem_alloc_func(
+/*===========*/
+				/* out, own: free storage, NULL
+				if did not succeed */
+	ulint    n              /* in: desired number of bytes */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,      /* in: file name where created */
+	ulint    line            /* in: line where created */
+	#endif
+);
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed 
+with mem_free. */
+
+void*
+mem_alloc_func_noninline(
+/*=====================*/
+				/* out, own: free storage, NULL if did not
+				succeed */
+	ulint   n              	/* in: desired number of bytes */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,	/* in: file name where created */
+	ulint   line		/* in: line where created */
+	#endif
+	);
+/******************************************************************
+Use this macro instead of the corresponding function!
+Macro for memory buffer freeing */
+#ifdef  UNIV_MEM_DEBUG
+#define mem_free(PTR)   mem_free_func(\
+					  (PTR), __FILE__, __LINE__)
+#else
+#define mem_free(PTR)   mem_free_func(PTR)
+#endif
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Frees a single buffer of storage from
+the dynamic memory of C compiler. Similar to free of C. */
+UNIV_INLINE
+void
+mem_free_func(
+/*==========*/
+	void*   ptr             /* in, own: buffer to be freed */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,      /* in: file name where created */
+	ulint    line            /* in: line where created */
+	#endif
+);
+/*******************************************************************
+Implements realloc. */
+UNIV_INLINE
+void*
+mem_realloc(
+/*========*/
+			/* out, own: free storage, NULL if did not succeed */
+	void*	buf,	/* in: pointer to an old buffer */
+	ulint   n);	/* in: desired number of bytes */
+
+
+/*#######################################################################*/
+	
+/* The info header of a block in a memory heap */
+
+struct mem_block_info_struct {
+	UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
+			the list this is the base node of the list of blocks;
+			in subsequent blocks this is undefined */
+	UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next
+			and prev in the list. The first block allocated
+			to the heap is also the first block in this list,
+			though it also contains the base node of the list. */
+	ulint   len;    /* physical length of this block in bytes */
+	ulint 	type; 	/* type of heap: MEM_HEAP_DYNAMIC, or
+			MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
+	ibool	init_block; /* TRUE if this is the first block used in fast
+			creation of a heap: the memory will be freed
+			by the creator, not by mem_heap_free */
+	ulint   free;   /* offset in bytes of the first free position for
+			user data in the block */
+	ulint   start;  /* the value of the struct field 'free' at the 
+			creation of the block */
+	byte* 	free_block;
+			/* if the MEM_HEAP_BTR_SEARCH bit is set in type,
+			and this is the heap root, this can contain an
+			allocated buffer frame, which can be appended as a
+			free block to the heap, if we need more space;
+			otherwise, this is NULL */
+	ulint   magic_n;/* magic number for debugging */
+};
+
+/* Header size for a memory heap block */
+#define MEM_BLOCK_HEADER_SIZE   ut_calc_align(sizeof(mem_block_info_t),\
+							UNIV_MEM_ALIGNMENT)
+#include "mem0dbg.h"
+
+#ifndef UNIV_NONINL
+#include "mem0mem.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/mem0mem.ic b/innobase/include/mem0mem.ic
new file mode 100644
index 00000000000..8b8449469ef
--- /dev/null
+++ b/innobase/include/mem0mem.ic
@@ -0,0 +1,597 @@
+/************************************************************************
+The memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0dbg.ic"
+
+#include "mem0pool.h"
+
+/*******************************************************************
+Creates a memory heap block where data can be allocated. */
+
+mem_block_t*
+mem_heap_create_block(
+/*==================*/
+			/* out, own: memory heap block, NULL if did not
+			succeed */
+	mem_heap_t* heap,/* in: memory heap or NULL if first block should
+			be created */
+	ulint	n,	/* in: number of bytes needed for user data, or
+			if init_block is not NULL, its size in bytes */
+	void*	init_block, /* in: init block in fast create, type must be
+			MEM_HEAP_DYNAMIC */
+	ulint 	type);	/* in: type of heap: MEM_HEAP_DYNAMIC or
+			MEM_HEAP_BUFFER */
+/**********************************************************************
+Frees a block from a memory heap. */
+
+void
+mem_heap_block_free(
+/*================*/
+	mem_heap_t*	heap,	/* in: heap */
+	mem_block_t*	block);	/* in: block to free */
+/**********************************************************************
+Frees the free_block field from a memory heap. */
+
+void
+mem_heap_free_block_free(
+/*=====================*/
+	mem_heap_t*	heap);	/* in: heap */
+/*******************************************************************
+Adds a new block to a memory heap. */
+
+mem_block_t*
+mem_heap_add_block(
+/*===============*/
+				/* out: created block, NULL if did not
+				succeed */
+	mem_heap_t* 	heap,	/* in: memory heap */
+	ulint		n);	/* in: number of bytes user needs */
+
+UNIV_INLINE
+void
+mem_block_set_len(mem_block_t* block, ulint len)
+{
+	ut_ad(len > 0);
+
+	block->len = len;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_len(mem_block_t* block)
+{
+	return(block->len);
+}
+
+UNIV_INLINE
+void
+mem_block_set_type(mem_block_t* block, ulint type)
+{
+	ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
+		|| (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
+
+	block->type = type;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_type(mem_block_t* block)
+{
+	return(block->type);
+}
+
+UNIV_INLINE
+void
+mem_block_set_free(mem_block_t* block, ulint free)
+{
+	ut_ad(free > 0);
+	ut_ad(free <= mem_block_get_len(block));
+
+	block->free = free;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_free(mem_block_t* block)
+{
+	return(block->free);
+}
+
+UNIV_INLINE
+void
+mem_block_set_start(mem_block_t* block, ulint start)
+{
+	ut_ad(start > 0);
+
+	block->start = start;
+}
+
+UNIV_INLINE
+ulint
+mem_block_get_start(mem_block_t* block)
+{
+	return(block->start);
+}
+
+/*******************************************************************
+Allocates n bytes of memory from a memory heap. */
+UNIV_INLINE
+void*
+mem_heap_alloc(
+/*===========*/
+				/* out: allocated storage */
+	mem_heap_t*	heap, 	/* in: memory heap */
+	ulint           n)      /* in: number of bytes; if the heap is allowed
+				to grow into the buffer pool, this must be
+				<= MEM_MAX_ALLOC_IN_BUF */
+{
+	mem_block_t*	block;
+	void*		buf;
+	ulint		free;
+	
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
+	
+	/* Check if there is enough space in block. If not, create a new
+	block to the heap */
+
+	if (mem_block_get_len(block) 
+			< mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) {
+
+		block = mem_heap_add_block(heap, n);
+
+		if (block == NULL) {
+
+			return(NULL);
+		}
+	}
+
+	free = mem_block_get_free(block);
+
+	buf = (byte*)block + free;
+
+	mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
+
+	#ifdef UNIV_MEM_DEBUG
+
+	/* In the debug version write debugging info to the field */
+	mem_field_init((byte*)buf, n);
+
+	/* Advance buf to point at the storage which will be given to the
+	caller */
+	buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
+
+	#endif
+
+	return(buf);
+}
+
+/*********************************************************************
+Returns a pointer to the heap top. */
+UNIV_INLINE
+byte*
+mem_heap_get_heap_top(
+/*==================*/     
+				/* out: pointer to the heap top */
+	mem_heap_t*   	heap) 	/* in: memory heap */
+{
+	mem_block_t*	block;
+	byte*		buf;
+	
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	buf = (byte*)block + mem_block_get_free(block);
+
+	return(buf);
+} 
+
+/*********************************************************************
+Frees the space in a memory heap exceeding the pointer given. The
+pointer must have been acquired from mem_heap_get_heap_top. The first
+memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_free_heap_top(
+/*===================*/
+	mem_heap_t*   	heap,	/* in: heap from which to free */
+	byte*		old_top)/* in: pointer to old top of heap */
+{
+	mem_block_t*	block;
+	mem_block_t*	prev_block;
+	#ifdef UNIV_MEM_DEBUG
+	ibool		error;
+	ulint		total_size;	
+	ulint		size;
+	#endif			
+
+	ut_ad(mem_heap_check(heap));
+	
+	#ifdef UNIV_MEM_DEBUG
+
+	/* Validate the heap and get its total allocated size */
+	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
+								NULL, NULL);
+	ut_a(!error);
+
+	/* Get the size below top pointer */
+	mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
+									NULL);
+	ut_a(!error);
+
+	#endif
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	while (block != NULL) {
+		if (((byte*)block + mem_block_get_free(block) >= old_top)
+						&& ((byte*)block <= old_top)) {
+			/* Found the right block */
+
+			break;
+		}
+ 
+		/* Store prev_block value before freeing the current block
+		(the current block will be erased in freeing) */
+
+		prev_block = UT_LIST_GET_PREV(list, block);
+
+		mem_heap_block_free(heap, block);
+
+		block = prev_block;
+	}
+	
+	ut_ad(block);
+
+	/* Set the free field of block */
+	mem_block_set_free(block, old_top - (byte*)block); 
+
+	#ifdef UNIV_MEM_DEBUG
+	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
+
+	/* In the debug version erase block from top up */
+	
+	mem_erase_buf(old_top, (byte*)block + block->len - old_top);
+
+	/* Update allocated memory count */
+	mutex_enter(&mem_hash_mutex);
+	mem_current_allocated_memory -= (total_size - size);
+	mutex_exit(&mem_hash_mutex);
+
+	#endif
+
+	/* If free == start, we may free the block if it is not the first
+	one */
+	
+	if ((heap != block) && (mem_block_get_free(block) == 
+				  		mem_block_get_start(block))) {
+		mem_heap_block_free(heap, block);
+	}
+}
+
+/*********************************************************************
+Empties a memory heap. The first memory block of the heap is not freed. */
+UNIV_INLINE
+void
+mem_heap_empty(
+/*===========*/
+	mem_heap_t*   	heap)	/* in: heap to empty */
+{
+	mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap));
+
+	if (heap->free_block) {
+		mem_heap_free_block_free(heap);
+	}
+}	
+
+/*********************************************************************
+Returns a pointer to the topmost element in a memory heap. The size of the
+element must be given. */
+UNIV_INLINE
+void*
+mem_heap_get_top(
+/*=============*/     
+				/* out: pointer to the topmost element */
+	mem_heap_t*   	heap, 	/* in: memory heap */
+	ulint           n)      /* in: size of the topmost element */
+{
+	mem_block_t*	block;
+	void*		buf;
+	
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
+
+	#ifdef UNIV_MEM_DEBUG
+	ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block));
+
+	/* In the debug version, advance buf to point at the storage which
+	was given to the caller in the allocation*/
+	
+	buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
+
+	/* Check that the field lengths agree */
+	ut_ad(n == (ulint)mem_field_header_get_len(buf));
+	#endif
+
+	return(buf);
+} 
+
+/*********************************************************************
+Frees the topmost element in a memory heap. The size of the element must be
+given. */
+UNIV_INLINE
+void
+mem_heap_free_top(
+/*==============*/    
+	mem_heap_t*   	heap, 	/* in: memory heap */
+	ulint           n)      /* in: size of the topmost element */
+{
+	mem_block_t*	block;
+	
+	ut_ad(mem_heap_check(heap));
+
+	block = UT_LIST_GET_LAST(heap->base);
+
+	/* Subtract the free field of block */
+	mem_block_set_free(block, mem_block_get_free(block)
+						- MEM_SPACE_NEEDED(n));
+	#ifdef UNIV_MEM_DEBUG
+
+	ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
+
+	/* In the debug version check the consistency, and erase field */
+	mem_field_erase((byte*)block + mem_block_get_free(block), n);
+	#endif
+
+	/* If free == start, we may free the block if it is not the first
+	one */
+	
+	if ((heap != block) && (mem_block_get_free(block) == 
+				  	mem_block_get_start(block))) {
+		mem_heap_block_free(heap, block);
+	}
+}
+
+/*********************************************************************
+NOTE: Use the corresponding macros instead of this function. Creates a
+memory heap which allocates memory from dynamic space. For debugging
+purposes, takes also the file name and line as argument in the debug
+version. */
+UNIV_INLINE
+mem_heap_t*
+mem_heap_create_func(
+/*=================*/
+				/* out, own: memory heap */
+	ulint	n,		/* in: desired start block size,
+				this means that a single user buffer
+				of size n will fit in the block, 
+				0 creates a default size block;
+				if init_block is not NULL, n tells
+				its size in bytes */
+	void*	init_block,	/* in: if very fast creation is
+				wanted, the caller can reserve some
+				memory from its stack, for example,
+				and pass it as the the initial block
+				to the heap: then no OS call of malloc
+				is needed at the creation. CAUTION:
+				the caller must make sure the initial
+				block is not unintentionally erased
+				(if allocated in the stack), before
+				the memory heap is explicitly freed. */
+	ulint	type		/* in: MEM_HEAP_DYNAMIC, or MEM_HEAP_BUFFER
+				possibly ORed to MEM_HEAP_BTR_SEARCH */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,	/* in: file name where created */
+	ulint	line		/* in: line where created */
+	#endif
+	)
+{
+	mem_block_t*   block; 
+
+	if (n > 0) {
+		block = mem_heap_create_block(NULL, n, init_block, type);
+	} else {
+		block = mem_heap_create_block(NULL, MEM_BLOCK_START_SIZE, 
+							init_block, type);
+	}
+
+	ut_ad(block);
+
+	UT_LIST_INIT(block->base);
+
+	/* Add the created block itself as the first block in the list */
+	UT_LIST_ADD_FIRST(list, block->base, block);
+
+	#ifdef UNIV_MEM_DEBUG
+
+	if (block == NULL) {
+
+		return(block);
+	}
+
+	mem_hash_insert(block, file_name, line);
+
+	#endif
+	
+	return(block);
+}
+
+/*********************************************************************
+NOTE: Use the corresponding macro instead of this function. Frees the space
+occupied by a memory heap. In the debug version erases the heap memory
+blocks. */
+UNIV_INLINE
+void
+mem_heap_free_func(
+/*===============*/
+	mem_heap_t*   	heap  		/* in, own: heap to be freed */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  	file_name,	/* in: file name where freed */
+	ulint    	line		/* in: line where freed */
+	#endif
+	)
+{
+	mem_block_t*	block;
+	mem_block_t*	prev_block;
+
+	ut_ad(mem_heap_check(heap));
+	
+	block = UT_LIST_GET_LAST(heap->base);
+
+	#ifdef UNIV_MEM_DEBUG
+
+	/* In the debug version remove the heap from the hash table of heaps
+	and check its consistency */
+
+	mem_hash_remove(heap, file_name, line); 
+
+	#endif
+	
+	if (heap->free_block) {
+		mem_heap_free_block_free(heap);
+	}
+
+	while (block != NULL) { 
+		/* Store the contents of info before freeing current block
+		(it is erased in freeing) */
+
+		prev_block = UT_LIST_GET_PREV(list, block);
+
+		mem_heap_block_free(heap, block);
+
+		block = prev_block;
+	}
+}
+
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function.
+Allocates a single buffer of memory from the dynamic memory of
+the C compiler. Is like malloc of C. The buffer must be freed 
+with mem_free. */
+UNIV_INLINE
+void*
+mem_alloc_func(
+/*===========*/
+				/* out, own: free storage, NULL if did not
+				succeed */
+	ulint   n              	/* in: desired number of bytes */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,	/* in: file name where created */
+	ulint   line		/* in: line where created */
+	#endif
+	)
+{
+	#ifndef UNIV_MEM_DEBUG
+
+	return(mem_area_alloc(n, mem_comm_pool));
+	
+	#else
+	
+	mem_heap_t*   	heap; 
+	void*           buf;
+
+	heap = mem_heap_create_func(n, NULL, MEM_HEAP_DYNAMIC, file_name,
+									line);
+	if (heap == NULL) {
+
+		return(NULL);
+	}
+	
+	/* Note that as we created the first block in the heap big enough
+	for the buffer requested by the caller, the buffer will be in the
+	first block and thus we can calculate the pointer to the heap from
+	the pointer to the buffer when we free the memory buffer. */
+
+	buf = mem_heap_alloc(heap, n);
+
+	ut_ad((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE
+					- MEM_FIELD_HEADER_SIZE);
+	return(buf);
+
+	#endif
+}
+
+/*******************************************************************
+NOTE: Use the corresponding macro instead of this function. Frees a single
+buffer of storage from the dynamic memory of the C compiler. Similar to the
+free of C. */
+UNIV_INLINE
+void
+mem_free_func(
+/*==========*/
+	void*   ptr             /* in, own: buffer to be freed */
+	#ifdef UNIV_MEM_DEBUG
+	,char*  file_name,      /* in: file name where created */
+	ulint   line            /* in: line where created */
+	#endif
+	)
+{
+	#ifndef UNIV_MEM_DEBUG
+
+	mem_area_free(ptr, mem_comm_pool);
+       
+	#else
+	
+	mem_heap_t*   heap; 
+
+	heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE
+				 		- MEM_FIELD_HEADER_SIZE);
+	mem_heap_free_func(heap, file_name, line);
+	
+	#endif
+}
+
+/*********************************************************************
+Returns the space in bytes occupied by a memory heap. */
+UNIV_INLINE
+ulint
+mem_heap_get_size(
+/*==============*/
+	mem_heap_t*   heap)  	/* in: heap */
+{
+	mem_block_t*  	block;
+	ulint           size	= 0;
+
+	ut_ad(mem_heap_check(heap));
+	
+	block = heap;
+
+	while (block != NULL) { 
+
+		size += mem_block_get_len(block);
+		block = UT_LIST_GET_NEXT(list, block);
+	}
+
+	if (heap->free_block) {
+		size += UNIV_PAGE_SIZE;
+	}
+
+	return(size);
+}
+
+/*******************************************************************
+Implements realloc. */
+UNIV_INLINE
+void*
+mem_realloc(
+/*========*/
+			/* out, own: free storage, NULL if did not succeed */
+	void*	buf,	/* in: pointer to an old buffer */
+	ulint   n)	/* in: desired number of bytes */
+{
+	mem_free(buf);
+
+	return(mem_alloc(n));
+}
diff --git a/innobase/include/mem0pool.h b/innobase/include/mem0pool.h
new file mode 100644
index 00000000000..b6906894c53
--- /dev/null
+++ b/innobase/include/mem0pool.h
@@ -0,0 +1,83 @@
+/******************************************************
+The lowest-level memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/9/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef mem0pool_h
+#define mem0pool_h
+
+#include "univ.i"
+#include "os0file.h"
+
+typedef struct mem_area_struct	mem_area_t;
+typedef struct mem_pool_struct	mem_pool_t;
+
+/* The common memory pool */
+extern mem_pool_t*	mem_comm_pool;
+
+/* Each memory area takes this many extra bytes for control information */
+#define MEM_AREA_EXTRA_SIZE	UNIV_MEM_ALIGNMENT
+
+/************************************************************************
+Creates a memory pool. */
+
+mem_pool_t*
+mem_pool_create(
+/*============*/
+			/* out: memory pool */
+	ulint	size);	/* in: pool size in bytes */
+/************************************************************************
+Allocates memory from a pool. NOTE: This low-level function should only be
+used in mem0mem.*! */
+
+void*
+mem_area_alloc(
+/*===========*/
+				/* out, own: allocated memory buffer */
+	ulint		size,	/* in: allocated size in bytes; for optimum
+				space usage, the size should be a power of 2
+				minus MEM_AREA_EXTRA_SIZE */
+	mem_pool_t*	pool);	/* in: memory pool */
+/************************************************************************
+Frees memory to a pool. */
+
+void
+mem_area_free(
+/*==========*/
+	void*		ptr,	/* in, own: pointer to allocated memory
+				buffer */
+	mem_pool_t*	pool);	/* in: memory pool */
+/************************************************************************
+Returns the amount of reserved memory. */
+
+ulint
+mem_pool_get_reserved(
+/*==================*/
+				/* out: reserved mmeory in bytes */
+	mem_pool_t*	pool);	/* in: memory pool */
+/************************************************************************
+Validates a memory pool. */
+
+ibool
+mem_pool_validate(
+/*==============*/
+				/* out: TRUE if ok */
+	mem_pool_t*	pool);	/* in: memory pool */
+/************************************************************************
+Prints info of a memory pool. */
+
+void
+mem_pool_print_info(
+/*================*/
+	FILE*	        outfile,/* in: output file to write to */
+	mem_pool_t*	pool);	/* in: memory pool */
+
+
+#ifndef UNIV_NONINL
+#include "mem0pool.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/mem0pool.ic b/innobase/include/mem0pool.ic
new file mode 100644
index 00000000000..4e8c08733ed
--- /dev/null
+++ b/innobase/include/mem0pool.ic
@@ -0,0 +1,7 @@
+/************************************************************************
+The lowest-level memory management
+
+(c) 1994, 1995 Innobase Oy
+
+Created 6/8/1994 Heikki Tuuri
+*************************************************************************/
diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h
new file mode 100644
index 00000000000..acbf87df447
--- /dev/null
+++ b/innobase/include/mtr0log.h
@@ -0,0 +1,178 @@
+/******************************************************
+Mini-transaction logging routines
+
+(c) 1995 Innobase Oy
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0log_h
+#define mtr0log_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+
+/************************************************************
+Writes 1 - 4 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_ulint(
+/*=============*/
+	byte*	ptr,	/* in: pointer where to write */
+	ulint	val,	/* in: value to write */
+	byte	type,	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Writes 8 bytes to a file page buffered in the buffer pool.
+Writes the corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_dulint(
+/*==============*/
+	byte*	ptr,	/* in: pointer where to write */
+	dulint	val,	/* in: value to write */
+	byte	type,	/* in: MLOG_8BYTES */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Writes a string to a file page buffered in the buffer pool. Writes the
+corresponding log record to the mini-transaction log. */
+
+void
+mlog_write_string(
+/*==============*/
+	byte*	ptr,	/* in: pointer where to write */
+	byte*	str,	/* in: string to write */
+	ulint	len,	/* in: string length */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Writes initial part of a log record consisting of one-byte item
+type and four-byte space and page numbers. */
+
+void
+mlog_write_initial_log_record(
+/*==========================*/
+	byte*	ptr,	/* in: pointer to (inside) a buffer frame
+			holding the file page where modification
+			is made */
+	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Catenates 1 - 4 bytes to the mtr log. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	val,	/* in: value to write */
+	ulint	type);	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+/************************************************************
+Catenates n bytes to the mtr log. */
+
+void
+mlog_catenate_string(
+/*=================*/
+	mtr_t*	mtr,	/* in: mtr */
+	byte*	str,	/* in: string to write */
+	ulint	len);	/* in: string length */
+/************************************************************
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	val);	/* in: value to write */
+/************************************************************
+Catenates a compressed dulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_dulint_compressed(
+/*============================*/
+	mtr_t*	mtr,	/* in: mtr */
+	dulint	val);	/* in: value to write */
+/************************************************************
+Opens a buffer to mlog. It must be closed with mlog_close. */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+			/* out: buffer, NULL if log mode MTR_LOG_NONE */
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	size);	/* in: buffer size in bytes */
+/************************************************************
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+	mtr_t*	mtr,	/* in: mtr */
+	byte*	ptr);	/* in: buffer space from ptr up was not used */
+/************************************************************
+Writes the initial part of a log record. */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+			/* out: new value of log_ptr */
+	byte*	ptr,	/* in: pointer to (inside) a buffer frame holding the
+			file page where modification is made */
+	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
+	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
+	mtr_t*	mtr);	/* in: mtr */
+/****************************************************************
+Writes the contents of a mini-transaction log, if any, to the database log. */
+
+dulint
+mlog_write(
+/*=======*/
+	dyn_array_t*	mlog,		/* in: mlog */
+	ibool*		modifications);	/* out: TRUE if there were 
+					log items to write */
+/************************************************************
+Parses an initial log record written by mlog_write_initial_log_record. */
+
+byte*
+mlog_parse_initial_log_record(
+/*==========================*/
+			/* out: parsed record end, NULL if not a complete
+			record */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	byte*	type,	/* out: log record type: MLOG_1BYTE, ... */
+	ulint*	space,	/* out: space id */
+	ulint*	page_no);/* out: page number */
+/************************************************************
+Parses a log record written by mlog_write_ulint or mlog_write_dulint. */
+
+byte*
+mlog_parse_nbytes(
+/*==============*/
+			/* out: parsed record end, NULL if not a complete
+			record */
+	ulint	type,	/* in: log record type: MLOG_1BYTE, ... */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	byte*	page);	/* in: page where to apply the log record, or NULL */
+/************************************************************
+Parses a log record written by mlog_write_string. */
+
+byte*
+mlog_parse_string(
+/*==============*/
+			/* out: parsed record end, NULL if not a complete
+			record */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	byte*	page);	/* in: page where to apply the log record, or NULL */
+
+
+/* Insert, update, and maybe other functions may use this value to define an
+extra mlog buffer size for variable size data */
+#define MLOG_BUF_MARGIN	256
+
+#ifndef UNIV_NONINL
+#include "mtr0log.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mtr0log.ic b/innobase/include/mtr0log.ic
new file mode 100644
index 00000000000..c2150660794
--- /dev/null
+++ b/innobase/include/mtr0log.ic
@@ -0,0 +1,187 @@
+/******************************************************
+Mini-transaction logging routines
+
+(c) 1995 Innobase Oy
+
+Created 12/7/1995 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "ut0lst.h"
+#include "buf0buf.h"
+
+/************************************************************
+Opens a buffer to mlog. It must be closed with mlog_close. */
+UNIV_INLINE
+byte*
+mlog_open(
+/*======*/
+			/* out: buffer, NULL if log mode MTR_LOG_NONE */
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	size)	/* in: buffer size in bytes */
+{
+	dyn_array_t*	mlog;
+
+	mtr->modifications = TRUE;
+
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+		return(NULL);
+	}
+
+	mlog = &(mtr->log);
+
+	return(dyn_array_open(mlog, size));
+}
+
+/************************************************************
+Closes a buffer opened to mlog. */
+UNIV_INLINE
+void
+mlog_close(
+/*=======*/
+	mtr_t*	mtr,	/* in: mtr */
+	byte*	ptr)	/* in: buffer space from ptr up was not used */
+{
+	dyn_array_t*	mlog;
+
+	ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
+
+	mlog = &(mtr->log);
+
+	dyn_array_close(mlog, ptr);
+}
+
+/************************************************************
+Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
+UNIV_INLINE
+void
+mlog_catenate_ulint(
+/*================*/
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	val,	/* in: value to write */
+	ulint	type)	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+{
+	dyn_array_t*	mlog;
+	byte*		ptr;
+
+	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
+
+		return;
+	}
+
+	mlog = &(mtr->log);
+
+	ut_ad(MLOG_1BYTE == 1);
+	ut_ad(MLOG_2BYTES == 2);
+	ut_ad(MLOG_4BYTES == 4);
+
+	ptr = dyn_array_push(mlog, type);
+
+	if (type == MLOG_4BYTES) {
+		mach_write_to_4(ptr, val);
+	} else if (type == MLOG_2BYTES) {
+		mach_write_to_2(ptr, val);
+	} else {
+		ut_ad(type == MLOG_1BYTE);
+		mach_write_to_1(ptr, val);
+ 	}
+}
+
+/************************************************************
+Catenates a compressed ulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_ulint_compressed(
+/*===========================*/
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	val)	/* in: value to write */
+{
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 10);
+
+	/* If no logging is requested, we may return now */
+	if (log_ptr == NULL) {
+
+		return;
+	}
+
+	log_ptr += mach_write_compressed(log_ptr, val);
+
+	mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Catenates a compressed dulint to mlog. */
+UNIV_INLINE
+void
+mlog_catenate_dulint_compressed(
+/*============================*/
+	mtr_t*	mtr,	/* in: mtr */
+	dulint	val)	/* in: value to write */
+{
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 15);
+
+	/* If no logging is requested, we may return now */
+	if (log_ptr == NULL) {
+
+		return;
+	}
+
+	log_ptr += mach_dulint_write_compressed(log_ptr, val);
+
+	mlog_close(mtr, log_ptr);
+}
+
+/************************************************************
+Writes the initial part of a log record. */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_fast(
+/*===============================*/
+			/* out: new value of log_ptr */
+	byte*	ptr,	/* in: pointer to (inside) a buffer frame holding the
+			file page where modification is made */
+	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
+	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	buf_block_t*	block;
+	ulint		space;
+	ulint		offset;
+
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr), 
+							MTR_MEMO_PAGE_X_FIX));
+	ut_ad(type <= MLOG_BIGGEST_TYPE);
+	ut_ad(ptr && log_ptr);
+
+	block = buf_block_align(ptr);
+
+	space = buf_block_get_space(block);
+	offset = buf_block_get_page_no(block);
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;	
+	log_ptr += mach_write_compressed(log_ptr, space);
+	log_ptr += mach_write_compressed(log_ptr, offset);
+
+	mtr->n_log_recs++;
+
+#ifdef UNIV_LOG_DEBUG
+/*	printf("Adding to mtr log record type %lu space %lu page no %lu\n",
+						type, space, offset); */
+#endif
+
+#ifdef UNIV_DEBUG
+	/* We now assume that all x-latched pages have been modified! */
+
+	if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
+
+		mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
+	}
+#endif
+	return(log_ptr);
+}	
diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h
new file mode 100644
index 00000000000..9f9401cd1a5
--- /dev/null
+++ b/innobase/include/mtr0mtr.h
@@ -0,0 +1,343 @@
+/******************************************************
+Mini-transaction buffer
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0mtr_h
+#define mtr0mtr_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "dyn0dyn.h"
+#include "buf0types.h"
+#include "sync0rw.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+#include "page0types.h"
+
+/* Logging modes for a mini-transaction */
+#define MTR_LOG_ALL		21	/* default mode: log all operations
+					modifying disk-based data */
+#define	MTR_LOG_NONE		22	/* log no operations */
+/*#define	MTR_LOG_SPACE	23 */	/* log only operations modifying
+					file space page allocation data
+					(operations in fsp0fsp.* ) */
+#define	MTR_LOG_SHORT_INSERTS	24	/* inserts are logged in a shorter
+					form */
+					
+/* Types for the mlock objects to store in the mtr memo; NOTE that the
+first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+#define	MTR_MEMO_PAGE_S_FIX	RW_S_LATCH
+#define	MTR_MEMO_PAGE_X_FIX	RW_X_LATCH
+#define	MTR_MEMO_BUF_FIX	RW_NO_LATCH
+#define MTR_MEMO_MODIFY		54
+#define	MTR_MEMO_S_LOCK		55
+#define	MTR_MEMO_X_LOCK		56
+
+/* Log item types: we have made them to be of the type 'byte'
+for the compiler to warn if val and type parameters are switched
+in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the
+flag value must give the length also! */
+#define	MLOG_SINGLE_REC_FLAG	128		/* if the mtr contains only
+						one log record for one page,
+						i.e., write_initial_log_record
+						has been called only once,
+						this flag is ORed to the type
+						of that first log record */
+#define	MLOG_1BYTE		((byte)1) 	/* one byte is written */
+#define	MLOG_2BYTES		((byte)2)	/* 2 bytes ... */
+#define	MLOG_4BYTES		((byte)4)	/* 4 bytes ... */
+#define	MLOG_8BYTES		((byte)8)	/* 8 bytes ... */
+#define	MLOG_REC_INSERT		((byte)9)	/* record insert */
+#define	MLOG_REC_CLUST_DELETE_MARK ((byte)10) 	/* mark clustered index record
+						deleted */
+#define	MLOG_REC_SEC_DELETE_MARK ((byte)11) 	/* mark secondary index record
+						deleted */
+#define MLOG_REC_UPDATE_IN_PLACE ((byte)13)	/* update of a record,
+						preserves record field sizes */
+#define MLOG_REC_DELETE		((byte)14)	/* delete a record from a
+						page */
+#define	MLOG_LIST_END_DELETE 	((byte)15)	/* delete record list end on
+						index page */
+#define	MLOG_LIST_START_DELETE 	((byte)16) 	/* delete record list start on
+						index page */
+#define	MLOG_LIST_END_COPY_CREATED ((byte)17) 	/* copy record list end to a
+						new created index page */
+#define	MLOG_PAGE_REORGANIZE 	((byte)18)	/* reorganize an index page */
+#define MLOG_PAGE_CREATE 	((byte)19)	/* create an index page */
+#define	MLOG_UNDO_INSERT 	((byte)20)	/* insert entry in an undo
+						log */
+#define MLOG_UNDO_ERASE_END	((byte)21)	/* erase an undo log page end */
+#define	MLOG_UNDO_INIT 		((byte)22)	/* initialize a page in an
+						undo log */
+#define MLOG_UNDO_HDR_DISCARD	((byte)23)	/* discard an update undo log
+						header */
+#define	MLOG_UNDO_HDR_REUSE	((byte)24)	/* reuse an insert undo log
+						header */
+#define MLOG_UNDO_HDR_CREATE	((byte)25)	/* create an undo log header */
+#define MLOG_REC_MIN_MARK	((byte)26)	/* mark an index record as the
+						predefined minimum record */
+#define MLOG_IBUF_BITMAP_INIT	((byte)27)	/* initialize an ibuf bitmap
+						page */
+#define	MLOG_FULL_PAGE		((byte)28)	/* full contents of a page */
+#define MLOG_INIT_FILE_PAGE	((byte)29)	/* this means that a file page
+						is taken into use and the prior
+						contents of the page should be
+						ignored: in recovery we must
+						not trust the lsn values stored
+						to the file page */
+#define MLOG_WRITE_STRING	((byte)30)	/* write a string to a page */
+#define	MLOG_MULTI_REC_END	((byte)31)	/* if a single mtr writes
+						log records for several pages,
+						this log record ends the
+						sequence of these records */
+#define MLOG_DUMMY_RECORD	((byte)32)	/* dummy log record used to
+						pad a log block full */
+#define MLOG_BIGGEST_TYPE	((byte)32) 	/* biggest value (used in
+						asserts) */
+					
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle 
+and buffer in the memory buffer given by the caller. */
+UNIV_INLINE
+mtr_t*
+mtr_start(
+/*======*/
+			/* out: mtr buffer which also acts as
+			the mtr handle */
+	mtr_t*	mtr);	/* in: memory buffer for the mtr buffer */
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle 
+and buffer in the memory buffer given by the caller. */
+
+mtr_t*
+mtr_start_noninline(
+/*================*/
+			/* out: mtr buffer which also acts as
+			the mtr handle */
+	mtr_t*	mtr);	/* in: memory buffer for the mtr buffer */
+/*******************************************************************
+Commits a mini-transaction. */
+
+void
+mtr_commit(
+/*=======*/
+	mtr_t*	mtr);	/* in: mini-transaction */
+/****************************************************************
+Writes to the database log the full contents of the pages that this mtr is
+the first to modify in the buffer pool. This function is called when the
+database is in the online backup state. */
+
+void
+mtr_log_write_backup_entries(
+/*=========================*/
+	mtr_t*	mtr,		/* in: mini-transaction */
+	dulint	backup_lsn);	/* in: online backup lsn */
+/**************************************************************
+Sets and returns a savepoint in mtr. */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+			/* out: savepoint */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************
+Releases the latches stored in an mtr memo down to a savepoint.
+NOTE! The mtr must not have made changes to buffer pages after the
+savepoint, as these can be handled only by mtr_commit. */
+
+void
+mtr_rollback_to_savepoint(
+/*======================*/
+	mtr_t*	mtr,		/* in: mtr */
+	ulint	savepoint);	/* in: savepoint */
+/**************************************************************
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+	mtr_t*		mtr,		/* in: mtr */
+	ulint		savepoint,	/* in: savepoint */
+	rw_lock_t* 	lock);		/* in: latch to release */
+/*******************************************************************
+Gets the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+			/* out: logging mode: MTR_LOG_NONE, ... */
+	mtr_t*	mtr);	/* in: mtr */
+/*******************************************************************
+Changes the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+			/* out: old mode */
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	mode);	/* in: logging mode: MTR_LOG_NONE, ... */
+/************************************************************
+Reads 1 - 4 bytes from a file page buffered in the buffer pool. */
+
+ulint
+mtr_read_ulint(
+/*===========*/
+			/* out: value read */
+	byte*	ptr,	/* in: pointer from where to read */
+	ulint	type,	/* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Reads 8 bytes from a file page buffered in the buffer pool. */
+
+dulint
+mtr_read_dulint(
+/*===========*/
+			/* out: value read */
+	byte*	ptr,	/* in: pointer from where to read */
+	ulint	type,	/* in: MLOG_8BYTES */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/*************************************************************************
+This macro locks an rw-lock in s-mode. */
+#ifdef UNIV_SYNC_DEBUG
+#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), __FILE__, __LINE__,\
+						(MTR))
+#else
+#define mtr_s_lock(B, MTR)	mtr_s_lock_func((B), (MTR))
+#endif
+/*************************************************************************
+This macro locks an rw-lock in x-mode. */
+#ifdef UNIV_SYNC_DEBUG
+#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), __FILE__, __LINE__,\
+						(MTR))
+#else
+#define mtr_x_lock(B, MTR)	mtr_x_lock_func((B), (MTR))
+#endif
+/*************************************************************************
+NOTE! Use the macro above!
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line number */
+#endif
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************************
+NOTE! Use the macro above!
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line number */
+#endif
+	mtr_t*		mtr);	/* in: mtr */
+
+/*******************************************************
+Releases an object in the memo stack. */
+
+void
+mtr_memo_release(
+/*=============*/
+	mtr_t*	mtr,	/* in: mtr */
+	void*	object,	/* in: object */
+	ulint	type);	/* in: object type: MTR_MEMO_S_LOCK, ... */
+/****************************************************************
+Parses a log record which contains the full contents of a page. */
+
+byte*
+mtr_log_parse_full_page(
+/*====================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */
+/**************************************************************
+Checks if memo contains the given item. */
+UNIV_INLINE
+ibool
+mtr_memo_contains(
+/*==============*/
+			/* out: TRUE if contains */
+	mtr_t*	mtr,	/* in: mtr */
+	void*	object,	/* in: object to search */
+	ulint	type);	/* in: type of object */
+/*************************************************************
+Prints info of an mtr handle. */
+
+void
+mtr_print(
+/*======*/
+	mtr_t*	mtr);	/* in: mtr */
+/*######################################################################*/
+
+#define	MTR_BUF_MEMO_SIZE	200	/* number of slots in memo */
+
+/*******************************************************************
+Returns the log object of a mini-transaction buffer. */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+			/* out: log */
+	mtr_t*	mtr);	/* in: mini-transaction */
+/*******************************************************
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+	mtr_t*	mtr,	/* in: mtr */
+	void*	object,	/* in: object */
+	ulint	type);	/* in: object type: MTR_MEMO_S_LOCK, ... */
+
+
+/* Type definition of a mini-transaction memo stack slot. */
+typedef	struct mtr_memo_slot_struct	mtr_memo_slot_t;
+struct mtr_memo_slot_struct{
+	ulint	type;	/* type of the stored object (MTR_MEMO_S_LOCK, ...) */
+	void*	object;	/* pointer to the object */
+};
+
+/* Mini-transaction handle and buffer */
+struct mtr_struct{
+	ulint		state;	/* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
+	dyn_array_t	memo;	/* memo stack for locks etc. */
+	dyn_array_t	log;	/* mini-transaction log */
+	ibool		modifications;
+				/* TRUE if the mtr made modifications to
+				buffer pool pages */
+	ulint		n_log_recs;
+				/* count of how many page initial log records
+				have been written to the mtr log */
+	ulint		log_mode; /* specifies which operations should be
+				logged; default value MTR_LOG_ALL */
+	dulint		start_lsn;/* start lsn of the possible log entry for
+				this mtr */
+	dulint		end_lsn;/* end lsn of the possible log entry for
+				this mtr */
+	ulint		magic_n;
+};
+
+#define	MTR_MAGIC_N		54551
+
+#define MTR_ACTIVE		12231
+#define MTR_COMMITTING		56456
+#define MTR_COMMITTED		34676
+	
+#ifndef UNIV_NONINL
+#include "mtr0mtr.ic"
+#endif
+
+#endif
diff --git a/innobase/include/mtr0mtr.ic b/innobase/include/mtr0mtr.ic
new file mode 100644
index 00000000000..5718d872bcb
--- /dev/null
+++ b/innobase/include/mtr0mtr.ic
@@ -0,0 +1,261 @@
+/******************************************************
+Mini-transaction buffer
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "mach0data.h"
+
+/*******************************************************************
+Starts a mini-transaction and creates a mini-transaction handle 
+and a buffer in the memory buffer given by the caller. */
+UNIV_INLINE
+mtr_t*
+mtr_start(
+/*======*/
+			/* out: mtr buffer which also acts as
+			the mtr handle */
+	mtr_t*	mtr)	/* in: memory buffer for the mtr buffer */
+{
+	dyn_array_create(&(mtr->memo));
+	dyn_array_create(&(mtr->log));
+
+	mtr->log_mode = MTR_LOG_ALL;
+	mtr->modifications = FALSE;
+	mtr->n_log_recs = 0;
+
+#ifdef UNIV_DEBUG
+	mtr->state = MTR_ACTIVE;
+	mtr->magic_n = MTR_MAGIC_N;
+#endif
+	return(mtr);
+}		
+
+/*******************************************************
+Pushes an object to an mtr memo stack. */
+UNIV_INLINE
+void
+mtr_memo_push(
+/*==========*/
+	mtr_t*	mtr,	/* in: mtr */
+	void*	object,	/* in: object */
+	ulint	type)	/* in: object type: MTR_MEMO_S_LOCK, ... */
+{
+	dyn_array_t*		memo;
+	mtr_memo_slot_t*	slot;
+
+	ut_ad(object);
+	ut_ad(type >= MTR_MEMO_PAGE_S_FIX);	
+	ut_ad(type <= MTR_MEMO_X_LOCK);
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+	memo = &(mtr->memo);	
+
+	slot = dyn_array_push(memo, sizeof(mtr_memo_slot_t));
+
+	slot->object = object;
+	slot->type = type;
+}
+
+/**************************************************************
+Sets and returns a savepoint in mtr. */
+UNIV_INLINE
+ulint
+mtr_set_savepoint(
+/*==============*/
+			/* out: savepoint */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	dyn_array_t*	memo;
+ 
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+	memo = &(mtr->memo);	
+
+	return(dyn_array_get_data_size(memo));
+}
+
+/**************************************************************
+Releases the (index tree) s-latch stored in an mtr memo after a
+savepoint. */
+UNIV_INLINE
+void
+mtr_release_s_latch_at_savepoint(
+/*=============================*/
+	mtr_t*		mtr,		/* in: mtr */
+	ulint		savepoint,	/* in: savepoint */
+	rw_lock_t* 	lock)		/* in: latch to release */
+{
+	mtr_memo_slot_t* slot;
+	dyn_array_t*	memo;
+	
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
+	memo = &(mtr->memo);
+
+	ut_ad(dyn_array_get_data_size(memo) > savepoint);
+	
+	slot = dyn_array_get_element(memo, savepoint);
+
+	ut_ad(slot->object == lock);
+	ut_ad(slot->type == MTR_MEMO_S_LOCK);
+
+	rw_lock_s_unlock(lock);
+
+	slot->object = NULL;
+}
+
+/**************************************************************
+Checks if memo contains the given item. */
+UNIV_INLINE
+ibool
+mtr_memo_contains(
+/*==============*/
+			/* out: TRUE if contains */
+	mtr_t*	mtr,	/* in: mtr */
+	void*	object,	/* in: object to search */
+	ulint	type)	/* in: type of object */
+{
+	mtr_memo_slot_t* slot;
+	dyn_array_t*	memo;
+	ulint		offset;
+
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+	memo = &(mtr->memo);
+
+	offset = dyn_array_get_data_size(memo);
+
+	while (offset > 0) {
+		offset -= sizeof(mtr_memo_slot_t);
+
+		slot = dyn_array_get_element(memo, offset);
+
+		if ((object == slot->object) && (type == slot->type)) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
+/*******************************************************************
+Returns the log object of a mini-transaction buffer. */
+UNIV_INLINE
+dyn_array_t*
+mtr_get_log(
+/*========*/
+			/* out: log */
+	mtr_t*	mtr)	/* in: mini-transaction */
+{
+	ut_ad(mtr);
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+
+	return(&(mtr->log));
+}
+
+/*******************************************************************
+Gets the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_get_log_mode(
+/*=============*/
+			/* out: logging mode: MTR_LOG_NONE, ... */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	ut_ad(mtr);
+	ut_ad(mtr->log_mode >= MTR_LOG_ALL);
+	ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
+
+	return(mtr->log_mode);
+}
+
+/*******************************************************************
+Changes the logging mode of a mini-transaction. */
+UNIV_INLINE
+ulint
+mtr_set_log_mode(
+/*=============*/
+			/* out: old mode */
+	mtr_t*	mtr,	/* in: mtr */
+	ulint	mode)	/* in: logging mode: MTR_LOG_NONE, ... */
+{
+	ulint	old_mode;
+
+	ut_ad(mtr);
+	ut_ad(mode >= MTR_LOG_ALL);
+	ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
+
+	old_mode = mtr->log_mode;
+
+	if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
+		/* Do nothing */
+	} else {
+		mtr->log_mode = mode;
+	}
+
+	ut_ad(old_mode >= MTR_LOG_ALL);
+	ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
+
+	return(old_mode);
+}
+
+/*************************************************************************
+Locks a lock in s-mode. */
+UNIV_INLINE
+void
+mtr_s_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line number */
+#endif
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(mtr);
+	ut_ad(lock);
+
+	rw_lock_s_lock_func(lock
+		#ifdef UNIV_SYNC_DEBUG
+				,0, file, line
+		#endif
+			    );
+
+	mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
+}
+
+/*************************************************************************
+Locks a lock in x-mode. */
+UNIV_INLINE
+void
+mtr_x_lock_func(
+/*============*/
+	rw_lock_t*	lock,	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line number */
+#endif
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(mtr);
+	ut_ad(lock);
+
+	rw_lock_x_lock_func(lock, 0
+		#ifdef UNIV_SYNC_DEBUG
+				, file, line
+		#endif
+			    );
+
+	mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
+}
diff --git a/innobase/include/mtr0types.h b/innobase/include/mtr0types.h
new file mode 100644
index 00000000000..e3b6ec9a84f
--- /dev/null
+++ b/innobase/include/mtr0types.h
@@ -0,0 +1,14 @@
+/******************************************************
+Mini-transaction buffer global types
+
+(c) 1995 Innobase Oy
+
+Created 11/26/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef mtr0types_h
+#define mtr0types_h
+
+typedef struct mtr_struct	mtr_t;
+
+#endif
diff --git a/innobase/include/odbc0odbc.h b/innobase/include/odbc0odbc.h
new file mode 100644
index 00000000000..7f842b54b27
--- /dev/null
+++ b/innobase/include/odbc0odbc.h
@@ -0,0 +1,20 @@
+/******************************************************
+Innobase ODBC client library additional header
+
+(c) 1998 Innobase Oy
+
+Created 2/22/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef odbc0odbc_h
+#define odbc0odbc_h
+
+#include "ib_odbc.h"
+
+/* Datagram size in communications */
+#define ODBC_DATAGRAM_SIZE	8192
+
+/* Communication address maximum length in bytes */
+#define ODBC_ADDRESS_SIZE	COM_MAX_ADDR_LEN
+
+#endif 
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
new file mode 100644
index 00000000000..5b90f24f12e
--- /dev/null
+++ b/innobase/include/os0file.h
@@ -0,0 +1,353 @@
+/******************************************************
+The interface to the operating system file io
+
+(c) 1995 Innobase Oy
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0file_h
+#define os0file_h
+
+#include "univ.i"
+
+#ifdef __WIN__
+
+#include <windows.h>
+#if (defined(__NT__) || defined(__WIN2000__))
+
+#define WIN_ASYNC_IO
+
+#endif
+
+#define UNIV_NON_BUFFERED_IO
+
+#else
+
+#if defined(HAVE_AIO_H) && defined(HAVE_LIBRT)
+#define POSIX_ASYNC_IO
+#endif
+
+#endif
+
+#ifdef __WIN__
+typedef	HANDLE	os_file_t;
+#else
+typedef int	os_file_t;
+#endif
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads */
+
+extern ibool	os_aio_use_native_aio;
+
+#define OS_FILE_SECTOR_SIZE		512
+
+/* The next value should be smaller or equal to the smallest sector size used
+on any disk. A log block is required to be a portion of disk which is written
+so that if the start and the end of a block get written to disk, then the
+whole block gets written. This should be true even in most cases of a crash:
+if this fails for a log block, then it is equivalent to a media failure in the
+log. */
+
+#define OS_FILE_LOG_BLOCK_SIZE		512
+
+/* Options for file_create */
+#define	OS_FILE_OPEN			51
+#define	OS_FILE_CREATE			52
+#define OS_FILE_OVERWRITE		53
+
+/* Options for file_create */
+#define	OS_FILE_AIO			61
+#define	OS_FILE_NORMAL			62
+
+/* Error codes from os_file_get_last_error */
+#define	OS_FILE_NOT_FOUND		71
+#define	OS_FILE_DISK_FULL		72
+#define	OS_FILE_ALREADY_EXISTS		73
+#define OS_FILE_AIO_RESOURCES_RESERVED	74	/* wait for OS aio resources
+						to become available again */
+#define	OS_FILE_ERROR_NOT_SPECIFIED	75
+
+/* Types for aio operations */
+#define OS_FILE_READ	10
+#define OS_FILE_WRITE	11
+
+#define OS_FILE_LOG	256	/* This can be ORed to type */
+
+#define OS_AIO_N_PENDING_IOS_PER_THREAD 32	/* Win NT does not allow more
+						than 64 */
+
+/* Modes for aio operations */
+#define OS_AIO_NORMAL	21	/* Normal asynchronous i/o not for ibuf
+				pages or ibuf bitmap pages */
+#define OS_AIO_IBUF	22	/* Asynchronous i/o for ibuf pages or ibuf
+				bitmap pages */
+#define OS_AIO_LOG  	23	/* Asynchronous i/o for the log */
+#define OS_AIO_SYNC	24	/* Asynchronous i/o where the calling thread
+				will itself wait for the i/o to complete,
+				doing also the job of the i/o-handler thread;
+				can be used for any pages, ibuf or non-ibuf.
+				This is used to save CPU time, as we can do
+				with fewer thread switches. Plain synchronous
+				i/o is not as good, because it must serialize
+				the file seek and read or write, causing a
+				bottleneck for parallelism. */
+
+#define OS_AIO_SIMULATED_WAKE_LATER	512 /* This can be ORed to mode
+				in the call of os_aio(...),
+				if the caller wants to post several i/o
+				requests in a batch, and only after that
+ 				wake the i/o-handler thread; this has
+				effect only in simulated aio */ 
+				
+/********************************************************************
+Opens an existing file or creates a new. */
+
+os_file_t
+os_file_create(
+/*===========*/
+			/* out, own: handle to the file, not defined if error,
+			error number can be retrieved with os_get_last_error */
+	char*	name,	/* in: name of the file or path as a null-terminated
+			string */
+	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+			(if does not exist, error), or OS_FILE_CREATE if a new
+			file is created (if exists, error), OS_FILE_OVERWRITE
+			if a new file is created or an old overwritten */
+	ulint	purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
+			is desired, OS_FILE_NORMAL, if any normal file */
+	ibool*	success);/* out: TRUE if succeed, FALSE if error */
+/***************************************************************************
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error. */
+
+ibool
+os_file_close(
+/*==========*/
+				/* out: TRUE if success */
+	os_file_t	file);	/* in, own: handle to a file */
+/***************************************************************************
+Gets a file size. */
+
+ibool
+os_file_get_size(
+/*=============*/
+				/* out: TRUE if success */
+	os_file_t	file,	/* in: handle to a file */
+	ulint*		size,	/* out: least significant 32 bits of file
+				size */
+	ulint*		size_high);/* out: most significant 32 bits of size */
+/***************************************************************************
+Sets a file size. This function can be used to extend or truncate a file. */
+
+ibool
+os_file_set_size(
+/*=============*/
+				/* out: TRUE if success */
+	char*		name,	/* in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/* in: handle to a file */
+	ulint		size,	/* in: least significant 32 bits of file
+				size */
+	ulint		size_high);/* in: most significant 32 bits of size */
+/***************************************************************************
+Flushes the write buffers of a given file to the disk. */
+
+ibool
+os_file_flush(
+/*==========*/
+				/* out: TRUE if success */
+	os_file_t	file);	/* in, own: handle to a file */
+/***************************************************************************
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned. */
+
+ulint
+os_file_get_last_error(void);
+/*========================*/
+		/* out: error number, or OS error number + 100 */
+/***********************************************************************
+Requests a synchronous read operation. */
+
+ibool
+os_file_read(
+/*=========*/
+				/* out: TRUE if request was
+				successful, FALSE if fail */
+	os_file_t	file,	/* in: handle to a file */
+	void*		buf,	/* in: buffer where to read */
+	ulint		offset,	/* in: least significant 32 bits of file
+				offset where to read */
+	ulint		offset_high,/* in: most significant 32 bits of
+				offset */
+	ulint		n);	/* in: number of bytes to read */	
+/***********************************************************************
+Requests a synchronous write operation. */
+
+ibool
+os_file_write(
+/*==========*/
+				/* out: TRUE if request was
+				successful, FALSE if fail */
+	char*		name,	/* in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/* in: handle to a file */
+	void*		buf,	/* in: buffer from which to write */
+	ulint		offset,	/* in: least significant 32 bits of file
+				offset where to write */
+	ulint		offset_high,/* in: most significant 32 bits of
+				offset */
+	ulint		n);	/* in: number of bytes to write */	
+/****************************************************************************
+Initializes the asynchronous io system. Creates separate aio array for
+non-ibuf read and write, a third aio array for the ibuf i/o, with just one
+segment, two aio arrays for log reads and writes with one segment, and a
+synchronous aio array of the specified size. The combined number of segments
+in the three first aio arrays is the parameter n_segments given to the
+function. The caller must create an i/o handler thread for each segment in
+the four first arrays, but not for the sync aio array. */
+
+void
+os_aio_init(
+/*========*/
+	ulint	n,		/* in: maximum number of pending aio operations
+				allowed; n must be divisible by n_segments */
+	ulint	n_segments,	/* in: combined number of segments in the four
+				first aio arrays; must be >= 4 */
+	ulint	n_slots_sync);	/* in: number of slots in the sync aio array */
+/***********************************************************************
+Requests an asynchronous i/o operation. */
+
+ibool
+os_aio(
+/*===*/
+				/* out: TRUE if request was queued
+				successfully, FALSE if fail */
+	ulint		type,	/* in: OS_FILE_READ or OS_FILE_WRITE */
+	ulint		mode,	/* in: OS_AIO_NORMAL, ..., possibly ORed
+				to OS_AIO_SIMULATED_WAKE_LATER: the
+				last flag advises this function not to wake
+				i/o-handler threads, but the caller will
+				do the waking explicitly later, in this
+				way the caller can post several requests in
+				a batch; NOTE that the batch must not be
+				so big that it exhausts the slots in aio
+				arrays! NOTE that a simulated batch
+				may introduce hidden chances of deadlocks,
+				because i/os are not actually handled until
+				all have been posted: use with great
+				caution! */
+	char*		name,	/* in: name of the file or path as a
+				null-terminated string */
+	os_file_t	file,	/* in: handle to a file */
+	void*		buf,	/* in: buffer where to read or from which
+				to write */
+	ulint		offset,	/* in: least significant 32 bits of file
+				offset where to read or write */
+	ulint		offset_high, /* in: most significant 32 bits of
+				offset */
+	ulint		n,	/* in: number of bytes to read or write */	
+	void*		message1,/* in: messages for the aio handler (these
+				can be used to identify a completed aio
+				operation); if mode is OS_AIO_SYNC, these
+				are ignored */
+	void*		message2);
+/**************************************************************************
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+
+void
+os_aio_simulated_wake_handler_threads(void);
+/*=======================================*/
+
+#ifdef WIN_ASYNC_IO
+/**************************************************************************
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+ibool
+os_aio_windows_handle(
+/*==================*/
+				/* out: TRUE if the aio operation succeeded */
+	ulint	segment,	/* in: the number of the segment in the aio
+				arrays to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 the log i/o thread,
+				then follow the non-ibuf read threads, and as
+				the last are the non-ibuf write threads; if
+				this is ULINT_UNDEFINED, then it means that
+				sync aio is used, and this parameter is
+				ignored */
+	ulint	pos,		/* this parameter is used only in sync aio:
+				wait for the aio slot at this position */  
+	void**	message1,	/* out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2);
+#endif
+#ifdef POSIX_ASYNC_IO
+/**************************************************************************
+This function is only used in Posix asynchronous i/o. Waits for an aio
+operation to complete. */
+
+ibool
+os_aio_posix_handle(
+/*================*/
+				/* out: TRUE if the aio operation succeeded */
+	ulint	array_no,	/* in: array number 0 - 3 */
+	void**	message1,	/* out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2);
+#endif
+/**************************************************************************
+Does simulated aio. This function should be called by an i/o-handler
+thread. */
+
+ibool
+os_aio_simulated_handle(
+/*====================*/
+				/* out: TRUE if the aio operation succeeded */
+	ulint	segment,	/* in: the number of the segment in the aio
+				arrays to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 the log i/o thread,
+				then follow the non-ibuf read threads, and as
+				the last are the non-ibuf write threads */
+	void**	message1,	/* out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2);
+/**************************************************************************
+Validates the consistency of the aio system. */
+
+ibool
+os_aio_validate(void);
+/*=================*/
+				/* out: TRUE if ok */
+/**************************************************************************
+Prints info of the aio arrays. */
+
+void
+os_aio_print(void);
+/*==============*/
+/**************************************************************************
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+
+ibool
+os_aio_all_slots_free(void);
+/*=======================*/
+				/* out: TRUE if all free */
+#endif 
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
new file mode 100644
index 00000000000..9da1f33e070
--- /dev/null
+++ b/innobase/include/os0proc.h
@@ -0,0 +1,71 @@
+/******************************************************
+The interface to the operating system
+process control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0proc_h
+#define os0proc_h
+
+#include "univ.i"
+
+typedef void*			os_process_t;
+typedef unsigned long int	os_process_id_t;
+
+/********************************************************************
+Allocates non-cacheable memory. */
+
+void*
+os_mem_alloc_nocache(
+/*=================*/
+			/* out: allocated memory */
+	ulint	n);	/* in: number of bytes */
+#ifdef notdefined
+/********************************************************************
+Creates a new process. */
+
+ibool
+os_process_create(
+/*==============*/
+	char*		name,	/* in: name of the executable to start
+				or its full path name */
+	char*		cmd,	/* in: command line for the starting
+				process, or NULL if no command line
+				specified */
+	os_process_t*	proc,	/* out: handle to the process */
+	os_process_id_t* id);	/* out: process id */
+/**************************************************************************
+Exits a process. */
+
+void
+os_process_exit(
+/*============*/
+	ulint	code);	/* in: exit code */
+/**************************************************************************
+Gets process exit code. */
+
+ibool
+os_process_get_exit_code(
+/*=====================*/
+				/* out: TRUE if succeed, FALSE if fail */
+	os_process_t	proc,	/* in: handle to the process */
+	ulint*		code);	/* out: exit code */
+#endif
+/********************************************************************
+Sets the priority boost for threads released from waiting within the current
+process. */
+
+void
+os_process_set_priority_boost(
+/*==========================*/
+	ibool	do_boost);	/* in: TRUE if priority boost should be done,
+				FALSE if not */
+
+#ifndef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/os0proc.ic b/innobase/include/os0proc.ic
new file mode 100644
index 00000000000..651ba1f17e3
--- /dev/null
+++ b/innobase/include/os0proc.ic
@@ -0,0 +1,10 @@
+/******************************************************
+The interface to the operating system
+process control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/innobase/include/os0shm.h b/innobase/include/os0shm.h
new file mode 100644
index 00000000000..250794a976f
--- /dev/null
+++ b/innobase/include/os0shm.h
@@ -0,0 +1,66 @@
+/******************************************************
+The interface to the operating system
+shared memory primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0shm_h
+#define os0shm_h
+
+#include "univ.i"
+
+typedef void*			os_shm_t;
+
+
+/********************************************************************
+Creates an area of shared memory. It can be named so that
+different processes may access it in the same computer.
+If an area with the same name already exists, returns
+a handle to that area (where the size of the area is
+not changed even if this call requests a different size).
+To use the area, it first has to be mapped to the process
+address space by os_shm_map. */
+
+os_shm_t
+os_shm_create(
+/*==========*/
+			/* out, own: handle to the shared
+			memory area, NULL if error */
+	ulint	size,	/* in: area size < 4 GB */
+	char*	name);	/* in: name of the area as a null-terminated
+			string */
+/***************************************************************************
+Frees a shared memory area. The area can be freed only after it
+has been unmapped in all the processes where it was mapped. */
+
+ibool
+os_shm_free(
+/*========*/
+				/* out: TRUE if success */
+	os_shm_t	shm);	/* in, own: handle to a shared memory area */
+/***************************************************************************
+Maps a shared memory area in the address space of a process. */
+
+void*
+os_shm_map(
+/*=======*/
+				/* out: address of the area, NULL if error */
+	os_shm_t	shm);	/* in: handle to a shared memory area */
+/***************************************************************************
+Unmaps a shared memory area from the address space of a process. */
+
+ibool
+os_shm_unmap(
+/*=========*/
+			/* out: TRUE if succeed */
+	void*	addr);	/* in: address of the area */
+
+
+#ifndef UNIV_NONINL
+#include "os0shm.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/os0shm.ic b/innobase/include/os0shm.ic
new file mode 100644
index 00000000000..cc267544bc9
--- /dev/null
+++ b/innobase/include/os0shm.ic
@@ -0,0 +1,10 @@
+/******************************************************
+The interface to the operating system
+shared memory primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/innobase/include/os0sync.h b/innobase/include/os0sync.h
new file mode 100644
index 00000000000..dcf519fdb9d
--- /dev/null
+++ b/innobase/include/os0sync.h
@@ -0,0 +1,198 @@
+/******************************************************
+The interface to the operating system
+synchronization primitives.
+
+(c) 1995 Innobase Oy
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+#ifndef os0sync_h
+#define os0sync_h
+
+#include "univ.i"
+
+#ifdef __WIN__
+
+#include <windows.h>
+typedef CRITICAL_SECTION	os_fast_mutex_t;
+typedef void*			os_event_t;
+
+#else
+
+typedef pthread_mutex_t	os_fast_mutex_t;
+struct os_event_struct {
+	os_fast_mutex_t	os_mutex;	/* this mutex protects the next
+					fields */
+	ibool		is_set;		/* this is TRUE if the next mutex is
+					not reserved */
+	os_fast_mutex_t	wait_mutex;	/* this mutex is used in waiting for
+					the event */
+};
+typedef struct os_event_struct os_event_struct_t;
+typedef os_event_struct_t*     os_event_t;
+#endif
+
+typedef struct os_mutex_struct	os_mutex_str_t;
+typedef os_mutex_str_t*		os_mutex_t;
+
+#define OS_SYNC_INFINITE_TIME	((ulint)(-1))
+
+#define OS_SYNC_TIME_EXCEEDED	1
+
+/*************************************************************
+Creates an event semaphore, i.e., a semaphore which may
+just have two states: signaled and nonsignaled.
+The created event is manual reset: it must be reset
+explicitly by calling sync_os_reset_event. */
+
+os_event_t
+os_event_create(
+/*============*/
+			/* out: the event handle */
+	char*	name);	/* in: the name of the event, if NULL
+			the event is created without a name */
+/*************************************************************
+Creates an auto-reset event semaphore, i.e., an event
+which is automatically reset when a single thread is
+released. */
+
+os_event_t
+os_event_create_auto(
+/*=================*/
+			/* out: the event handle */
+	char*	name);	/* in: the name of the event, if NULL
+			the event is created without a name */
+/**************************************************************
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+
+void
+os_event_set(
+/*=========*/
+	os_event_t	event);	/* in: event to set */
+/**************************************************************
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event. */
+
+void
+os_event_reset(
+/*===========*/
+	os_event_t	event);	/* in: event to reset */
+/**************************************************************
+Frees an event object. */
+
+void
+os_event_free(
+/*==========*/
+	os_event_t	event);	/* in: event to free */
+/**************************************************************
+Waits for an event object until it is in the signaled state. */
+
+void
+os_event_wait(
+/*==========*/
+	os_event_t	event);	/* in: event to wait */
+/**************************************************************
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. */
+
+ulint
+os_event_wait_time(
+/*===============*/
+				/* out: 0 if success,
+				OS_SYNC_TIME_EXCEEDED if timeout
+				was exceeded */
+	os_event_t	event,	/* in: event to wait */
+	ulint		time);	/* in: timeout in microseconds, or
+				OS_SYNC_INFINITE_TIME */
+/**************************************************************
+Waits for any event in an event array. Returns if even a single
+one is signaled or becomes signaled. */
+
+ulint
+os_event_wait_multiple(
+/*===================*/
+					/* out: index of the event
+					which was signaled */
+	ulint		n,		/* in: number of events in the
+					array */
+	os_event_t* 	event_array);	/* in: pointer to an array of event
+					handles */
+/*************************************************************
+Creates an operating system mutex semaphore.
+Because these are slow, the mutex semaphore of the database
+itself (sync_mutex_t) should be used where possible. */
+
+os_mutex_t
+os_mutex_create(
+/*============*/
+			/* out: the mutex handle */
+	char*	name);	/* in: the name of the mutex, if NULL
+			the mutex is created without a name */
+/**************************************************************
+Acquires ownership of a mutex semaphore. */
+
+void
+os_mutex_enter(
+/*===========*/
+	os_mutex_t	mutex);	/* in: mutex to acquire */
+/**************************************************************
+Releases ownership of a mutex. */
+
+void
+os_mutex_exit(
+/*==========*/
+	os_mutex_t	mutex);	/* in: mutex to release */
+/**************************************************************
+Frees an mutex object. */
+
+void
+os_mutex_free(
+/*==========*/
+	os_mutex_t	mutex);	/* in: mutex to free */
+#ifndef _WIN32
+/**************************************************************
+Acquires ownership of a fast mutex. */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+						/* out: 0 if success, != 0 if
+						was reserved by another
+						thread */
+	os_fast_mutex_t*	fast_mutex);	/* in: mutex to acquire */
+/**************************************************************
+Releases ownership of a fast mutex. */
+UNIV_INLINE
+void
+os_fast_mutex_unlock(
+/*=================*/
+	os_fast_mutex_t*	fast_mutex);	/* in: mutex to release */
+/*************************************************************
+Initializes an operating system fast mutex semaphore. */
+
+void
+os_fast_mutex_init(
+/*===============*/
+	os_fast_mutex_t*	fast_mutex);	/* in: fast mutex */
+/**************************************************************
+Acquires ownership of a fast mutex. */
+
+void
+os_fast_mutex_lock(
+/*===============*/
+	os_fast_mutex_t*	fast_mutex);	/* in: mutex to acquire */
+/**************************************************************
+Frees an mutex object. */
+
+void
+os_fast_mutex_free(
+/*===============*/
+	os_fast_mutex_t*	fast_mutex);	/* in: mutex to free */
+#endif
+	
+#ifndef UNIV_NONINL
+#include "os0sync.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/os0sync.ic b/innobase/include/os0sync.ic
new file mode 100644
index 00000000000..d82f38483e3
--- /dev/null
+++ b/innobase/include/os0sync.ic
@@ -0,0 +1,56 @@
+/******************************************************
+The interface to the operating system synchronization primitives.
+
+(c) 1995 Innobase Oy
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#ifdef __WIN__
+#include <winbase.h>
+#endif
+
+#ifndef _WIN32
+/**************************************************************
+Acquires ownership of a fast mutex. */
+UNIV_INLINE
+ulint
+os_fast_mutex_trylock(
+/*==================*/
+						/* out: 0 if success, != 0 if
+						was reserved by another
+						thread */
+	os_fast_mutex_t*	fast_mutex)	/* in: mutex to acquire */
+{
+#ifdef __WIN__	
+	int	ret;
+
+	/* TryEnterCriticalSection is probably not found from
+	NT versions < 4! */
+	ret = TryEnterCriticalSection(fast_mutex);
+
+	if (ret) {
+		return(0);
+	}
+
+	return(1);
+#else
+	return((ulint) pthread_mutex_trylock(fast_mutex));
+#endif
+}
+
+/**************************************************************
+Releases ownership of a fast mutex. */
+UNIV_INLINE
+void
+os_fast_mutex_unlock(
+/*=================*/
+	os_fast_mutex_t*	fast_mutex)	/* in: mutex to release */
+{
+#ifdef __WIN__
+	LeaveCriticalSection(fast_mutex);
+#else
+	pthread_mutex_unlock(fast_mutex);
+#endif
+}
+#endif
diff --git a/innobase/include/os0thread.h b/innobase/include/os0thread.h
new file mode 100644
index 00000000000..2b2d9fb4bd6
--- /dev/null
+++ b/innobase/include/os0thread.h
@@ -0,0 +1,121 @@
+/******************************************************
+The interface to the operating system
+process and thread control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef os0thread_h
+#define os0thread_h
+
+#include "univ.i"
+
+/* Maximum number of threads which can be created in the program */
+#define	OS_THREAD_MAX_N		1000
+
+/* Possible fixed priorities for threads */
+#define OS_THREAD_PRIORITY_NONE		100
+#define OS_THREAD_PRIORITY_BACKGROUND	1
+#define OS_THREAD_PRIORITY_NORMAL	2
+#define OS_THREAD_PRIORITY_ABOVE_NORMAL	3
+
+#ifdef __WIN__
+typedef void*			os_thread_t;
+#else
+typedef pthread_t               os_thread_t;
+#endif
+typedef	unsigned long int	os_thread_id_t;
+
+/********************************************************************
+Creates a new thread of execution. The execution starts from
+the function given. The start function takes a void* parameter
+and returns a ulint. */
+
+os_thread_t
+os_thread_create(
+/*=============*/
+						/* out: handle to the thread */
+	ulint (*start_f)(void*),		/* in: pointer to function
+						from which to start */
+	void*			arg,		/* in: argument to start
+						function */
+	os_thread_id_t*		thread_id);	/* out: id of created
+						thread */	
+/*********************************************************************
+A thread calling this function ends its execution. */
+
+void
+os_thread_exit(
+/*===========*/
+	ulint	code);	/* in: exit code */
+/*********************************************************************
+Returns the thread identifier of current thread. */
+
+os_thread_id_t
+os_thread_get_curr_id(void);
+/*========================*/
+/*********************************************************************
+Returns handle to the current thread. */
+
+os_thread_t
+os_thread_get_curr(void);
+/*====================*/
+/*********************************************************************
+Converts a thread id to a ulint. */
+
+ulint
+os_thread_conv_id_to_ulint(
+/*=======================*/
+				/* out: converted to ulint */
+	os_thread_id_t	id);	/* in: thread id */
+/*********************************************************************
+Waits for a thread to terminate. */
+
+void
+os_thread_wait(
+/*===========*/
+	os_thread_t	thread);	/* in: thread to wait */
+/*********************************************************************
+Advises the os to give up remainder of the thread's time slice. */
+
+void
+os_thread_yield(void);
+/*=================*/
+/*********************************************************************
+The thread sleeps at least the time given in microseconds. */
+
+void
+os_thread_sleep(
+/*============*/
+	ulint	tm);	/* in: time in microseconds */
+/**********************************************************************
+Gets a thread priority. */
+
+ulint
+os_thread_get_priority(
+/*===================*/
+				/* out: priority */
+	os_thread_t	handle);/* in: OS handle to the thread */
+/**********************************************************************
+Sets a thread priority. */
+
+void
+os_thread_set_priority(
+/*===================*/
+	os_thread_t	handle,	/* in: OS handle to the thread */
+	ulint		pri);	/* in: priority: one of OS_PRIORITY_... */
+/**********************************************************************
+Gets the last operating system error code for the calling thread. */
+
+ulint
+os_thread_get_last_error(void);
+/*==========================*/
+
+
+#ifndef UNIV_NONINL
+#include "os0thread.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/os0thread.ic b/innobase/include/os0thread.ic
new file mode 100644
index 00000000000..a75aa3abb34
--- /dev/null
+++ b/innobase/include/os0thread.ic
@@ -0,0 +1,8 @@
+/******************************************************
+The interface to the operating system
+process and thread control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h
new file mode 100644
index 00000000000..144e0e02b21
--- /dev/null
+++ b/innobase/include/page0cur.h
@@ -0,0 +1,263 @@
+/************************************************************************
+The page cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef page0cur_h
+#define page0cur_h
+
+#include "univ.i"
+
+#include "page0types.h"
+#include "page0page.h"
+#include "rem0rec.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+
+
+#define PAGE_CUR_ADAPT
+
+/* Page cursor search modes; the values must be in this order! */
+
+#define	PAGE_CUR_G	1
+#define	PAGE_CUR_GE	2
+#define	PAGE_CUR_L	3
+#define	PAGE_CUR_LE	4
+#define	PAGE_CUR_DBG	5
+
+extern ulint	page_cur_short_succ;
+
+/*************************************************************
+Gets pointer to the page frame where the cursor is positioned. */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+				/* out: page */
+	page_cur_t*	cur);	/* in: page cursor */
+/*************************************************************
+Gets the record where the cursor is positioned. */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+				/* out: record */
+	page_cur_t*	cur);	/* in: page cursor */
+/*************************************************************
+Sets the cursor object to point before the first user record 
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+	page_t*		page,	/* in: index page */
+	page_cur_t*	cur);	/* in: cursor */
+/*************************************************************
+Sets the cursor object to point after the last user record on 
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+	page_t*		page,	/* in: index page */
+	page_cur_t*	cur);	/* in: cursor */
+/*************************************************************
+Returns TRUE if the cursor is before first user record on page. */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+				/* out: TRUE if at start */
+	page_cur_t*	cur);	/* in: cursor */
+/*************************************************************
+Returns TRUE if the cursor is after last user record. */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+				/* out: TRUE if at end */
+	page_cur_t*	cur);	/* in: cursor */
+/**************************************************************
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+	rec_t*		rec,	/* in: record on a page */
+	page_cur_t*	cur);	/* in: page cursor */
+/**************************************************************
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+	page_cur_t*	cur);	/* in: page cursor */
+/**************************************************************
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+	page_cur_t*	cur);	/* in: cursor; must not be after last */
+/**************************************************************
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+	page_cur_t*	cur);	/* in: cursor; must not before first */
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+				/* out: pointer to record if succeed, NULL
+				otherwise */
+	page_cur_t*	cursor,	/* in: a page cursor */
+	dtuple_t*      	tuple,  /* in: pointer to a data tuple */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+				/* out: pointer to record if succeed, NULL
+				otherwise */
+	page_cur_t*	cursor,	/* in: a page cursor */
+	rec_t*		rec,	/* in: record to insert */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The record to be
+inserted can be in a data tuple or as a physical record. The other parameter
+must then be NULL. The cursor stays at the same position. */
+
+rec_t*
+page_cur_insert_rec_low(
+/*====================*/
+				/* out: pointer to record if succeed, NULL
+				otherwise */
+	page_cur_t*	cursor,	/* in: a page cursor */
+	dtuple_t*      	tuple,  /* in: pointer to a data tuple or NULL */
+	ulint		data_size,/* in: data size of tuple */
+	rec_t*      	rec,  	/* in: pointer to a physical record or NULL */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/*****************************************************************
+Copies records from page to a newly created page, from a given record onward,
+including that record. Infimum and supremum records are not copied. */
+
+void
+page_copy_rec_list_end_to_created_page(
+/*===================================*/
+	page_t*	new_page,	/* in: index page to copy to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec,		/* in: first record to copy */
+	mtr_t*	mtr);		/* in: mtr */
+/***************************************************************
+Deletes a record at the page cursor. The cursor is moved to the 
+next record after the deleted one. */
+
+void
+page_cur_delete_rec(
+/*================*/
+	page_cur_t*  	cursor,		/* in: a page cursor */
+	mtr_t*		mtr);		/* in: mini-transaction handle */
+/********************************************************************
+Searches the right position for a page cursor. */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+				/* out: number of matched fields on the left */
+	page_t*		page,	/* in: index page */
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+				or PAGE_CUR_GE */
+	page_cur_t*	cursor);/* out: page cursor */
+/********************************************************************
+Searches the right position for a page cursor. */
+
+void
+page_cur_search_with_match(
+/*=======================*/
+	page_t*		page,	/* in: index page */
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+				or PAGE_CUR_GE */
+	ulint*		iup_matched_fields,
+				/* in/out: already matched fields in upper
+				limit record */
+	ulint*		iup_matched_bytes,
+				/* in/out: already matched bytes in a field
+				not yet completely matched */
+	ulint*		ilow_matched_fields,
+				/* in/out: already matched fields in lower
+				limit record */
+	ulint*		ilow_matched_bytes,
+				/* in/out: already matched bytes in a field
+				not yet completely matched */
+	page_cur_t*	cursor); /* out: page cursor */ 
+/***************************************************************
+Positions a page cursor on a randomly chosen user record on a page. If there
+are no user records, sets the cursor on the infimum record. */
+
+void
+page_cur_open_on_rnd_user_rec(
+/*==========================*/
+	page_t*		page,	/* in: page */
+	page_cur_t*	cursor);/* in/out: page cursor */
+/***************************************************************
+Parses a log record of a record insert on a page. */
+
+byte*
+page_cur_parse_insert_rec(
+/*======================*/
+			/* out: end of log record or NULL */
+	ibool	is_short,/* in: TRUE if short inserts */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/**************************************************************
+Parses a log record of copying a record list end to a new created page. */
+
+byte*
+page_parse_copy_rec_list_to_created_page(
+/*=====================================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/***************************************************************
+Parses log record of a record delete on a page. */
+
+byte*
+page_cur_parse_delete_rec(
+/*======================*/
+			/* out: pointer to record end or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+
+/* Index page cursor */
+
+struct page_cur_struct{
+	byte*	rec;	/* pointer to a record on page */
+};
+
+#ifndef UNIV_NONINL
+#include "page0cur.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic
new file mode 100644
index 00000000000..4313036adaf
--- /dev/null
+++ b/innobase/include/page0cur.ic
@@ -0,0 +1,221 @@
+/************************************************************************
+The page cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/4/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "page0page.h"
+
+
+/*************************************************************
+Gets pointer to the page frame where the cursor is positioned. */
+UNIV_INLINE
+page_t*
+page_cur_get_page(
+/*==============*/
+				/* out: page */
+	page_cur_t*	cur)	/* in: page cursor */
+{
+	ut_ad(cur);
+
+	return(buf_frame_align(cur->rec));
+}
+
+/*************************************************************
+Gets the record where the cursor is positioned. */
+UNIV_INLINE
+rec_t*
+page_cur_get_rec(
+/*=============*/
+				/* out: record */
+	page_cur_t*	cur)	/* in: page cursor */
+{
+	ut_ad(cur);
+
+	return(cur->rec);
+}
+
+/*************************************************************
+Sets the cursor object to point before the first user record 
+on the page. */
+UNIV_INLINE
+void
+page_cur_set_before_first(
+/*======================*/
+	page_t*		page,	/* in: index page */
+	page_cur_t*	cur)	/* in: cursor */
+{
+	cur->rec = page_get_infimum_rec(page);
+}
+
+/*************************************************************
+Sets the cursor object to point after the last user record on 
+the page. */
+UNIV_INLINE
+void
+page_cur_set_after_last(
+/*====================*/
+	page_t*		page,	/* in: index page */
+	page_cur_t*	cur)	/* in: cursor */
+{
+	cur->rec = page_get_supremum_rec(page);
+}
+
+/*************************************************************
+Returns TRUE if the cursor is before first user record on page. */
+UNIV_INLINE
+ibool
+page_cur_is_before_first(
+/*=====================*/
+				/* out: TRUE if at start */
+	page_cur_t*	cur)	/* in: cursor */
+{
+	if (page_get_infimum_rec(page_cur_get_page(cur)) == cur->rec) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************
+Returns TRUE if the cursor is after last user record. */
+UNIV_INLINE
+ibool
+page_cur_is_after_last(
+/*===================*/
+				/* out: TRUE if at end */
+	page_cur_t*	cur)	/* in: cursor */
+{
+	if (page_get_supremum_rec(page_cur_get_page(cur)) == cur->rec) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/**************************************************************
+Positions the cursor on the given record. */
+UNIV_INLINE
+void
+page_cur_position(
+/*==============*/
+	rec_t*		rec,	/* in: record on a page */
+	page_cur_t*	cur)	/* in: page cursor */
+{
+	ut_ad(rec && cur);
+
+	cur->rec = rec;
+}
+
+/**************************************************************
+Invalidates a page cursor by setting the record pointer NULL. */
+UNIV_INLINE
+void
+page_cur_invalidate(
+/*================*/
+	page_cur_t*	cur)	/* in: page cursor */
+{
+	ut_ad(cur);
+
+	cur->rec = NULL;
+}
+
+/**************************************************************
+Moves the cursor to the next record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_next(
+/*==================*/
+	page_cur_t*	cur)	/* in: cursor; must not be after last */
+{
+	ut_ad(!page_cur_is_after_last(cur));
+
+	cur->rec = page_rec_get_next(cur->rec);
+}
+
+/**************************************************************
+Moves the cursor to the previous record on page. */
+UNIV_INLINE
+void
+page_cur_move_to_prev(
+/*==================*/
+	page_cur_t*	cur)	/* in: cursor; must not before first */
+{
+	ut_ad(!page_cur_is_before_first(cur));
+
+	cur->rec = page_rec_get_prev(cur->rec);
+}
+
+/********************************************************************
+Searches the right position for a page cursor. */
+UNIV_INLINE
+ulint
+page_cur_search(
+/*============*/
+				/* out: number of matched fields on the left */
+	page_t*		page,	/* in: index page */
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		mode,	/* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
+				or PAGE_CUR_GE */
+	page_cur_t*	cursor)	/* out: page cursor */
+{
+	ulint		low_matched_fields = 0;
+	ulint		low_matched_bytes = 0;
+	ulint		up_matched_fields = 0;
+	ulint		up_matched_bytes = 0;
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	page_cur_search_with_match(page, tuple, mode,
+					&low_matched_fields,
+					&low_matched_bytes,
+					&up_matched_fields,
+					&up_matched_bytes,
+					cursor);
+	return(low_matched_fields);
+}
+
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_tuple_insert(
+/*==================*/
+				/* out: pointer to record if succeed, NULL
+				otherwise */
+	page_cur_t*	cursor,	/* in: a page cursor */
+	dtuple_t*      	tuple,  /* in: pointer to a data tuple */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	ulint	data_size;
+
+	ut_ad(dtuple_check_typed(tuple));
+
+	data_size = dtuple_get_data_size(tuple);
+
+	return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr));
+}
+
+/***************************************************************
+Inserts a record next to page cursor. Returns pointer to inserted record if
+succeed, i.e., enough space available, NULL otherwise. The cursor stays at
+the same position. */
+UNIV_INLINE
+rec_t*
+page_cur_rec_insert(
+/*================*/
+				/* out: pointer to record if succeed, NULL
+				otherwise */
+	page_cur_t*	cursor,	/* in: a page cursor */
+	rec_t*		rec,	/* in: record to insert */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr));
+}
+
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
new file mode 100644
index 00000000000..8e68381b868
--- /dev/null
+++ b/innobase/include/page0page.h
@@ -0,0 +1,697 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0page_h
+#define page0page_h
+
+#include "univ.i"
+
+#include "page0types.h"
+#include "fil0fil.h"
+#include "buf0buf.h"
+#include "data0data.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "fsp0fsp.h"
+#include "mtr0mtr.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/*			PAGE HEADER
+			===========
+
+Index page header starts at the first offset left free by the FIL-module */
+
+typedef	byte		page_header_t;
+
+#define	PAGE_HEADER	FSEG_PAGE_DATA	/* index page header starts at this
+				offset */
+/*-----------------------------*/
+#define PAGE_N_DIR_SLOTS 0	/* number of slots in page directory */
+#define	PAGE_HEAP_TOP	 2	/* pointer to record heap top */
+#define	PAGE_N_HEAP	 4	/* number of records in the heap */
+#define	PAGE_FREE	 6	/* pointer to start of page free record list */
+#define	PAGE_GARBAGE	 8	/* number of bytes in deleted records */
+#define	PAGE_LAST_INSERT 10	/* pointer to the last inserted record, or
+				NULL if this info has been reset by a delete,
+				for example */
+#define	PAGE_DIRECTION	 12	/* last insert direction: PAGE_LEFT, ... */
+#define	PAGE_N_DIRECTION 14	/* number of consecutive inserts to the same
+				direction */
+#define	PAGE_N_RECS	 16	/* number of user records on the page */
+#define PAGE_MAX_TRX_ID	 18	/* highest id of a trx which may have modified
+				a record on the page; a dulint; defined only
+				in secondary indexes; specifically, not in an
+				ibuf tree; NOTE: this may be modified only
+				when the thread has an x-latch to the page,
+				and ALSO an x-latch to btr_search_latch
+				if there is a hash index to the page! */
+#define PAGE_HEADER_PRIV_END 26	/* end of private data structure of the page
+				header which are set in a page create */
+/*----*/
+#define	PAGE_LEVEL	 26	/* level of the node in an index tree; the
+				leaf level is the level 0 */
+#define	PAGE_INDEX_ID	 28	/* index id where the page belongs */
+#define PAGE_BTR_SEG_LEAF 36	/* file segment header for the leaf pages in
+				a B-tree: defined only on the root page of a
+				B-tree, but not in the root of an ibuf tree */
+#define PAGE_BTR_IBUF_FREE_LIST	PAGE_BTR_SEG_LEAF
+#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
+				/* in the place of PAGE_BTR_SEG_LEAF and _TOP
+				there is a free list base node if the page is
+				the root page of an ibuf tree, and at the same
+				place is the free list node if the page is in
+				a free list */
+#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
+				/* file segment header for the non-leaf pages
+				in a B-tree: defined only on the root page of
+				a B-tree, but not in the root of an ibuf
+				tree */
+/*----*/
+#define PAGE_DATA	(PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
+				/* start of data on the page */
+
+#define PAGE_INFIMUM	(PAGE_DATA + 1 + REC_N_EXTRA_BYTES)
+				/* offset of the page infimum record on the
+				page */
+#define PAGE_SUPREMUM	(PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8)
+				/* offset of the page supremum record on the
+				page */
+#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9)
+				/* offset of the page supremum record end on
+				the page */
+/*-----------------------------*/
+
+/* Directions of cursor movement */
+#define	PAGE_LEFT		1
+#define	PAGE_RIGHT		2
+#define	PAGE_SAME_REC		3
+#define	PAGE_SAME_PAGE		4
+#define	PAGE_NO_DIRECTION	5
+
+/*			PAGE DIRECTORY
+			==============
+*/
+
+typedef	byte			page_dir_slot_t;
+typedef page_dir_slot_t		page_dir_t;
+
+/* Offset of the directory start down from the page end. We call the
+slot with the highest file address directory start, as it points to 
+the first record in the list of records. */
+#define	PAGE_DIR		FIL_PAGE_DATA_END
+
+/* We define a slot in the page directory as two bytes */
+#define	PAGE_DIR_SLOT_SIZE	2
+
+/* The offset of the physically lower end of the directory, counted from
+page end, when the page is empty */
+#define PAGE_EMPTY_DIR_START	(PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
+
+/* The maximum and minimum number of records owned by a directory slot. The
+number may drop below the minimum in the first and the last slot in the 
+directory. */
+#define PAGE_DIR_SLOT_MAX_N_OWNED	8
+#define	PAGE_DIR_SLOT_MIN_N_OWNED	4
+
+/*****************************************************************
+Returns the max trx id field value. */
+UNIV_INLINE
+dulint
+page_get_max_trx_id(
+/*================*/
+	page_t*	page);	/* in: page */
+/*****************************************************************
+Sets the max trx id field value. */
+
+void
+page_set_max_trx_id(
+/*================*/
+	page_t*	page,	/* in: page */
+	dulint	trx_id);/* in: transaction id */
+/*****************************************************************
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+	page_t*	page,	/* in: page */
+	dulint	trx_id);	/* in: transaction id */
+/*****************************************************************
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+	page_t*	page,	/* in: page */
+	ulint	field);	/* in: PAGE_N_DIR_SLOTS, ... */
+/*****************************************************************
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+	page_t*	page,	/* in: page */
+	ulint	field,	/* in: PAGE_N_DIR_SLOTS, ... */
+	ulint	val);	/* in: value */
+/*****************************************************************
+Returns the pointer stored in the given header field. */
+UNIV_INLINE
+byte*
+page_header_get_ptr(
+/*================*/
+			/* out: pointer or NULL */
+	page_t*	page,	/* in: page */
+	ulint	field);	/* in: PAGE_FREE, ... */
+/*****************************************************************
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+	page_t*	page,	/* in: page */
+	ulint	field,	/* in: PAGE_FREE, ... */
+	byte*	ptr);	/* in: pointer or NULL*/
+/*****************************************************************
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+	page_t*	page,	/* in: page */
+	mtr_t*	mtr);	/* in: mtr */
+/****************************************************************
+Gets the first record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_infimum_rec(
+/*=================*/
+			/* out: the first record in record list */
+	page_t*	page);	/* in: page which must have record(s) */
+/****************************************************************
+Gets the last record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_supremum_rec(
+/*==================*/
+			/* out: the last record in record list */
+	page_t*	page);	/* in: page which must have record(s) */
+/****************************************************************
+Returns the middle record of record list. If there are an even number
+of records in the list, returns the first record of upper half-list. */
+
+rec_t*
+page_get_middle_rec(
+/*================*/
+			/* out: middle record */
+	page_t*	page);	/* in: page */
+/*****************************************************************
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order. */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/	
+				/* out: 1, 0, -1, if dtuple is greater, equal, 
+				less than rec, respectively, when only the 
+				common first fields are compared */
+	dtuple_t*	dtuple,	/* in: data tuple */
+	rec_t*		rec,	/* in: physical record on a page; may also 
+				be page infimum or supremum, in which case 
+				matched-parameter values below are not 
+				affected */
+	ulint*	 	matched_fields, /* in/out: number of already completely 
+				matched fields; when function returns
+				contains the value for current comparison */
+	ulint*	  	matched_bytes); /* in/out: number of already matched 
+				bytes within the first field not completely
+				matched; when function returns contains the
+				value for current comparison */
+/*****************************************************************
+Gets the number of user records on page (the infimum and supremum records
+are not user records). */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+			/* out: number of user records */
+	page_t*	page);	/* in: index page */
+/*******************************************************************
+Returns the number of records before the given record in chain.
+The number includes infimum and supremum records. */
+
+ulint
+page_rec_get_n_recs_before(
+/*=======================*/
+			/* out: number of records */
+	rec_t*	rec);	/* in: the physical record */
+/*****************************************************************
+Gets the number of dir slots in directory. */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+			/* out: number of slots */
+	page_t*	page);	/* in: index page */
+/*****************************************************************
+Gets pointer to nth directory slot. */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+			/* out: pointer to dir slot */
+	page_t*	page,	/* in: index page */
+	ulint	n);	/* in: position */
+/******************************************************************
+Used to check the consistency of a record on a page. */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+			/* out: TRUE if succeed */
+	rec_t*	rec);	/* in: record */
+/*******************************************************************
+Gets the record pointed to by a directory slot. */
+UNIV_INLINE
+rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+					/* out: pointer to record */
+	page_dir_slot_t*	slot);	/* in: directory slot */
+/*******************************************************************
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+	page_dir_slot_t* slot,	/* in: directory slot */
+	rec_t*		 rec);	/* in: record on the page */
+/*******************************************************************
+Gets the number of records owned by a directory slot. */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+					/* out: number of records */
+	page_dir_slot_t* 	slot);	/* in: page directory slot */
+/*******************************************************************
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+	page_dir_slot_t*	slot,	/* in: directory slot */
+	ulint			n);	/* in: number of records owned 
+					by the slot */
+/****************************************************************
+Calculates the space reserved for directory slots of a given
+number of records. The exact value is a fraction number
+n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
+rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+	ulint	n_recs);	/* in: number of records */
+/*******************************************************************
+Looks for the directory slot which owns the given record. */
+UNIV_INLINE
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+				/* out: the directory slot number */
+	rec_t*	rec);		/* in: the physical record */
+/****************************************************************
+Gets the pointer to the next record on the page. */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+			/* out: pointer to next record */
+	rec_t*	rec);	/* in: pointer to record, must not be page
+			supremum */
+/****************************************************************
+Sets the pointer to the next record on the page. */ 
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+	rec_t*	rec,	/* in: pointer to record, must not be
+			page supremum */
+	rec_t*	next);	/* in: pointer to next record, must not
+			be page infimum */
+/****************************************************************
+Gets the pointer to the previous record. */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+			/* out: pointer to previous record */
+	rec_t*	rec);	/* in: pointer to record, must not be page
+			infimum */
+/****************************************************************
+TRUE if the record is a user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+			/* out: TRUE if a user record */
+	rec_t*	rec);	/* in: record */
+/****************************************************************
+TRUE if the record is the supremum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+			/* out: TRUE if the supremum record */
+	rec_t*	rec);	/* in: record */
+/****************************************************************
+TRUE if the record is the infimum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+			/* out: TRUE if the infimum record */
+	rec_t*	rec);	/* in: record */
+/****************************************************************
+TRUE if the record is the first user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_first_user_rec(
+/*=======================*/
+			/* out: TRUE if first user record */
+	rec_t*	rec);	/* in: record */
+/****************************************************************
+TRUE if the record is the last user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_last_user_rec(
+/*======================*/
+			/* out: TRUE if last user record */
+	rec_t*	rec);	/* in: record */
+/*******************************************************************
+Looks for the record which owns the given record. */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+			/* out: the owner record */
+	rec_t*	rec);	/* in: the physical record */
+/***************************************************************************
+This is a low-level operation which is used in a database index creation
+to update the page number of a created B-tree to a data dictionary
+record. */
+
+void
+page_rec_write_index_page_no(
+/*=========================*/
+	rec_t*	rec,	/* in: record to update */
+	ulint	i,	/* in: index of the field to update */
+	ulint	page_no,/* in: value to write */
+	mtr_t*	mtr);	/* in: mtr */
+/****************************************************************
+Returns the maximum combined size of records which can be inserted on top
+of record heap. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+			/* out: maximum combined size for inserted records */
+	page_t*	page,	/* in: index page */
+	ulint	n_recs);	/* in: number of records */
+/****************************************************************
+Returns the maximum combined size of records which can be inserted on top
+of record heap if page is first reorganized. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+			/* out: maximum combined size for inserted records */
+	page_t*	page,	/* in: index page */
+	ulint	n_recs);/* in: number of records */
+/*****************************************************************
+Calculates free space if a page is emptied. */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(void);
+/*==============================*/
+				/* out: free space */
+/****************************************************************
+Returns the sum of the sizes of the records in the record list
+excluding the infimum and supremum records. */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+			/* out: data in bytes */
+	page_t*	page);	/* in: index page */
+/****************************************************************
+Allocates a block of memory from an index page. */
+
+byte*
+page_mem_alloc(
+/*===========*/
+			/* out: pointer to start of allocated 
+			buffer, or NULL if allocation fails */
+	page_t*	page,	/* in: index page */
+	ulint	need,	/* in: number of bytes needed */
+	ulint*	heap_no);/* out: this contains the heap number
+			of the allocated record if allocation succeeds */
+/****************************************************************
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+	page_t*	page,	/* in: index page */
+	rec_t*	rec);	/* in: pointer to the (origin of) record */
+/**************************************************************
+The index page creation function. */
+
+page_t* 
+page_create(
+/*========*/
+					/* out: pointer to the page */
+	buf_frame_t*	frame,		/* in: a buffer frame where the page is
+					created */
+	mtr_t*		mtr);		/* in: mini-transaction handle */
+/*****************************************************************
+Differs from page_copy_rec_list_end, because this function does not
+touch the lock table and max trx id on page. */
+
+void
+page_copy_rec_list_end_no_locks(
+/*============================*/
+	page_t*	new_page,	/* in: index page to copy to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec,		/* in: record on page */
+	mtr_t*	mtr);		/* in: mtr */
+/*****************************************************************
+Copies records from page to new_page, from the given record onward,
+including that record. Infimum and supremum records are not copied.
+The records are copied to the start of the record list on new_page. */
+
+void
+page_copy_rec_list_end(
+/*===================*/
+	page_t*	new_page,	/* in: index page to copy to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec,		/* in: record on page */
+	mtr_t*	mtr);		/* in: mtr */
+/*****************************************************************
+Copies records from page to new_page, up to the given record, NOT
+including that record. Infimum and supremum records are not copied.
+The records are copied to the end of the record list on new_page. */
+
+void
+page_copy_rec_list_start(
+/*=====================*/
+	page_t*	new_page,	/* in: index page to copy to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec,		/* in: record on page */
+	mtr_t*	mtr);		/* in: mtr */
+/*****************************************************************
+Deletes records from a page from a given record onward, including that record.
+The infimum and supremum records are not deleted. */
+
+void
+page_delete_rec_list_end(
+/*=====================*/
+	page_t*	page,	/* in: index page */
+	rec_t*	rec,	/* in: record on page */
+	ulint	n_recs,	/* in: number of records to delete, or ULINT_UNDEFINED
+			if not known */
+	ulint	size,	/* in: the sum of the sizes of the records in the end
+			of the chain to delete, or ULINT_UNDEFINED if not
+			known */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Deletes records from page, up to the given record, NOT including
+that record. Infimum and supremum records are not deleted. */
+
+void
+page_delete_rec_list_start(
+/*=======================*/
+	page_t*	page,	/* in: index page */
+	rec_t*	rec,	/* in: record on page */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Moves record list end to another page. Moved records include
+split_rec. */
+
+void
+page_move_rec_list_end(
+/*===================*/
+	page_t*	new_page,	/* in: index page where to move */
+	page_t*	page,		/* in: index page */
+	rec_t*	split_rec,	/* in: first record to move */
+	mtr_t*	mtr);		/* in: mtr */
+/*****************************************************************
+Moves record list start to another page. Moved records do not include
+split_rec. */
+
+void
+page_move_rec_list_start(
+/*=====================*/
+	page_t*	new_page,	/* in: index page where to move */
+	page_t*	page,		/* in: index page */
+	rec_t*	split_rec,	/* in: first record not to move */
+	mtr_t*	mtr);		/* in: mtr */
+/********************************************************************
+Splits a directory slot which owns too many records. */
+
+void
+page_dir_split_slot(
+/*================*/
+	page_t*	page, 		/* in: the index page in question */
+	ulint	slot_no); 	/* in: the directory slot */
+/*****************************************************************
+Tries to balance the given directory slot with too few records
+with the upper neighbor, so that there are at least the minimum number 
+of records owned by the slot; this may result in the merging of 
+two slots. */
+
+void
+page_dir_balance_slot(
+/*==================*/
+	page_t*	page,		/* in: index page */
+	ulint	slot_no); 	/* in: the directory slot */
+/**************************************************************
+Parses a log record of a record list end or start deletion. */
+
+byte*
+page_parse_delete_rec_list(
+/*=======================*/
+			/* out: end of log record or NULL */
+	byte	type,	/* in: MLOG_LIST_END_DELETE or MLOG_LIST_START_DELETE */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */	
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/***************************************************************
+Parses a redo log record of creating a page. */
+
+byte*
+page_parse_create(
+/*==============*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/****************************************************************
+Prints record contents including the data relevant only in
+the index page context. */
+ 
+void
+page_rec_print(
+/*===========*/
+	rec_t*	rec);
+/*******************************************************************
+This is used to print the contents of the directory for
+debugging purposes. */
+
+void
+page_dir_print(
+/*===========*/
+	page_t*	page,	/* in: index page */
+	ulint	pr_n);	/* in: print n first and n last entries */
+/*******************************************************************
+This is used to print the contents of the page record list for
+debugging purposes. */
+
+void
+page_print_list(
+/*============*/
+	page_t*	page,	/* in: index page */
+	ulint	pr_n);	/* in: print n first and n last entries */
+/*******************************************************************
+Prints the info in a page header. */
+
+void
+page_header_print(
+/*==============*/
+	page_t*	page);
+/*******************************************************************
+This is used to print the contents of the page for
+debugging purposes. */
+
+void
+page_print(
+/*======*/
+	page_t*	page,	/* in: index page */
+	ulint	dn,	/* in: print dn first and last entries in directory */
+	ulint	rn);	/* in: print rn first and last records on page */
+/*******************************************************************
+The following is used to validate a record on a page. This function
+differs from rec_validate as it can also check the n_owned field and
+the heap_no field. */
+
+ibool
+page_rec_validate(
+/*==============*/
+			/* out: TRUE if ok */
+	rec_t* 	rec);	/* in: record on the page */
+/*******************************************************************
+This function checks the consistency of an index page. */
+
+ibool
+page_validate(
+/*==========*/
+				/* out: TRUE if ok */
+	page_t*		page,	/* in: index page */
+	dict_index_t*	index);	/* in: data dictionary index containing
+				the page record type definition */
+/*******************************************************************
+Looks in the page record list for a record with the given heap number. */
+
+rec_t*
+page_find_rec_with_heap_no(
+/*=======================*/
+			/* out: record, NULL if not found */
+	page_t*	page,	/* in: index page */
+	ulint	heap_no);/* in: heap number */
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE  UNIV_INLINE_ORIGINAL
+#endif
+
+#ifndef UNIV_NONINL
+#include "page0page.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic
new file mode 100644
index 00000000000..a029604c2bc
--- /dev/null
+++ b/innobase/include/page0page.ic
@@ -0,0 +1,772 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "rem0cmp.h"
+#include "mtr0log.h"
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE
+#endif
+
+/*****************************************************************
+Returns the max trx id field value. */
+UNIV_INLINE
+dulint
+page_get_max_trx_id(
+/*================*/
+	page_t*	page)	/* in: page */
+{
+	ut_ad(page);
+
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
+}
+
+/*****************************************************************
+Sets the max trx id field value if trx_id is bigger than the previous
+value. */
+UNIV_INLINE
+void
+page_update_max_trx_id(
+/*===================*/
+	page_t*	page,	/* in: page */
+	dulint	trx_id)	/* in: transaction id */
+{
+	ut_ad(page);
+
+	if (ut_dulint_cmp(page_get_max_trx_id(page), trx_id) < 0) {
+	
+		page_set_max_trx_id(page, trx_id);
+	}
+}
+
+/*****************************************************************
+Reads the given header field. */
+UNIV_INLINE
+ulint
+page_header_get_field(
+/*==================*/
+	page_t*	page,	/* in: page */
+	ulint	field)	/* in: PAGE_LEVEL, ... */
+{
+	ut_ad(page);
+	ut_ad(field <= PAGE_INDEX_ID);
+
+	return(mach_read_from_2(page + PAGE_HEADER + field));
+}
+
+/*****************************************************************
+Sets the given header field. */
+UNIV_INLINE
+void
+page_header_set_field(
+/*==================*/
+	page_t*	page,	/* in: page */
+	ulint	field,	/* in: PAGE_LEVEL, ... */
+	ulint	val)	/* in: value */
+{
+	ut_ad(page);
+	ut_ad(field <= PAGE_N_RECS);
+	ut_ad(val < UNIV_PAGE_SIZE);
+
+	mach_write_to_2(page + PAGE_HEADER + field, val);
+}
+
+/*****************************************************************
+Returns the pointer stored in the given header field. */
+UNIV_INLINE
+byte*
+page_header_get_ptr(
+/*================*/
+			/* out: pointer or NULL */
+	page_t*	page,	/* in: page */
+	ulint	field)	/* in: PAGE_FREE, ... */
+{
+	ulint	offs;
+
+	ut_ad(page);
+	ut_ad((field == PAGE_FREE)
+	      || (field == PAGE_LAST_INSERT)
+	      || (field == PAGE_HEAP_TOP));
+
+	offs = page_header_get_field(page, field);
+
+	ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+	if (offs == 0) {
+
+		return(NULL);
+	}
+
+	return(page + offs);
+}
+
+/*****************************************************************
+Sets the pointer stored in the given header field. */
+UNIV_INLINE
+void
+page_header_set_ptr(
+/*================*/
+	page_t*	page,	/* in: page */
+	ulint	field,	/* in: PAGE_FREE, ... */
+	byte*	ptr)	/* in: pointer or NULL*/
+{
+	ulint	offs;
+
+	ut_ad(page);
+	ut_ad((field == PAGE_FREE)
+	      || (field == PAGE_LAST_INSERT)
+	      || (field == PAGE_HEAP_TOP));
+
+	if (ptr == NULL) {
+		offs = 0;
+	} else {
+		offs = ptr - page;
+	}
+
+	ut_ad((field != PAGE_HEAP_TOP) || offs);
+
+	page_header_set_field(page, field, offs);
+}
+
+/*****************************************************************
+Resets the last insert info field in the page header. Writes to mlog
+about this operation. */
+UNIV_INLINE
+void
+page_header_reset_last_insert(
+/*==========================*/
+	page_t*	page,	/* in: page */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	ut_ad(page && mtr);
+
+	mlog_write_ulint(page + PAGE_HEADER + PAGE_LAST_INSERT, 0,
+							MLOG_2BYTES, mtr);
+}
+
+/****************************************************************
+Gets the first record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_infimum_rec(
+/*=================*/
+			/* out: the first record in record list */
+	page_t*	page)	/* in: page which must have record(s) */
+{
+	ut_ad(page);
+
+	return(page + PAGE_INFIMUM);
+}
+
+/****************************************************************
+Gets the last record on the page. */
+UNIV_INLINE
+rec_t*
+page_get_supremum_rec(
+/*==================*/
+			/* out: the last record in record list */
+	page_t*	page)	/* in: page which must have record(s) */
+{
+	ut_ad(page);
+
+	return(page + PAGE_SUPREMUM);
+}
+
+/****************************************************************
+TRUE if the record is a user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_user_rec(
+/*=================*/
+			/* out: TRUE if a user record */
+	rec_t*	rec)	/* in: record */
+{
+	ut_ad(rec);
+
+	if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+		return(FALSE);
+	}
+
+	if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+	     	return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/****************************************************************
+TRUE if the record is the supremum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_supremum(
+/*=================*/
+			/* out: TRUE if the supremum record */
+	rec_t*	rec)	/* in: record */
+{
+	ut_ad(rec);
+
+	if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/****************************************************************
+TRUE if the record is the infimum record on a page. */
+UNIV_INLINE
+ibool
+page_rec_is_infimum(
+/*================*/
+			/* out: TRUE if the infimum record */
+	rec_t*	rec)	/* in: record */
+{
+	ut_ad(rec);
+
+	if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/****************************************************************
+TRUE if the record is the first user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_first_user_rec(
+/*=======================*/
+			/* out: TRUE if first user record */
+	rec_t*	rec)	/* in: record */
+{
+	ut_ad(rec);
+
+	if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+		return(FALSE);
+	}
+
+	if (rec == page_rec_get_next(
+	     		page_get_infimum_rec(buf_frame_align(rec)))) {
+
+	     	return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/****************************************************************
+TRUE if the record is the last user record on the page. */
+UNIV_INLINE
+ibool
+page_rec_is_last_user_rec(
+/*======================*/
+			/* out: TRUE if last user record */
+	rec_t*	rec)	/* in: record */
+{
+	ut_ad(rec);
+
+	if (rec == page_get_supremum_rec(buf_frame_align(rec))) {
+
+		return(FALSE);
+	}
+
+	if (page_rec_get_next(rec)
+			== page_get_supremum_rec(buf_frame_align(rec))) {
+
+	     	return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*****************************************************************
+Compares a data tuple to a physical record. Differs from the function
+cmp_dtuple_rec_with_match in the way that the record must reside on an
+index page, and also page infimum and supremum records can be given in
+the parameter rec. These are considered as the negative infinity and
+the positive infinity in the alphabetical order. */
+UNIV_INLINE
+int
+page_cmp_dtuple_rec_with_match(
+/*===========================*/	
+				/* out: 1, 0, -1, if dtuple is greater, equal, 
+				less than rec, respectively, when only the 
+				common first fields are compared */
+	dtuple_t*	dtuple,	/* in: data tuple */
+	rec_t*		rec,	/* in: physical record on a page; may also 
+				be page infimum or supremum, in which case 
+				matched-parameter values below are not 
+				affected */
+	ulint*	 	matched_fields, /* in/out: number of already completely 
+				matched fields; when function returns
+				contains the value for current comparison */
+	ulint*	  	matched_bytes) /* in/out: number of already matched 
+				bytes within the first field not completely
+				matched; when function returns contains the
+				value for current comparison */
+{
+	page_t*	page;
+
+	ut_ad(dtuple_check_typed(dtuple));
+
+	page = buf_frame_align(rec);	
+
+	if (rec == page_get_infimum_rec(page)) {
+		return(1);
+	} else if (rec == page_get_supremum_rec(page)) {
+		return(-1);
+	} else {
+		return(cmp_dtuple_rec_with_match(dtuple, rec,
+						matched_fields,
+						matched_bytes));
+	}
+}
+
+/*****************************************************************
+Gets the number of user records on page (infimum and supremum records
+are not user records). */
+UNIV_INLINE
+ulint
+page_get_n_recs(
+/*============*/
+			/* out: number of user records */
+	page_t*	page)	/* in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_RECS));
+}
+
+/*****************************************************************
+Gets the number of dir slots in directory. */
+UNIV_INLINE
+ulint
+page_dir_get_n_slots(
+/*=================*/
+			/* out: number of slots */
+	page_t*	page)	/* in: index page */
+{
+	return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
+}
+
+/*****************************************************************
+Gets pointer to nth directory slot. */
+UNIV_INLINE
+page_dir_slot_t*
+page_dir_get_nth_slot(
+/*==================*/
+			/* out: pointer to dir slot */
+	page_t*	page,	/* in: index page */
+	ulint	n)	/* in: position */
+{
+	ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n);
+
+	return(page + UNIV_PAGE_SIZE - PAGE_DIR
+					- (n + 1) * PAGE_DIR_SLOT_SIZE);
+}		
+
+/******************************************************************
+Used to check the consistency of a record on a page. */
+UNIV_INLINE
+ibool
+page_rec_check(
+/*===========*/
+			/* out: TRUE if succeed */
+	rec_t*	rec)	/* in: record */
+{
+	page_t*	page;
+
+	ut_a(rec);	
+
+	page = buf_frame_align(rec);
+
+	ut_a(rec <= page_header_get_ptr(page, PAGE_HEAP_TOP));
+	ut_a(rec >= page + PAGE_DATA);
+
+	return(TRUE);
+}
+
+/******************************************************************
+Used to check the consistency of a directory slot. */
+UNIV_INLINE
+ibool
+page_dir_slot_check(
+/*================*/
+					/* out: TRUE if succeed */
+	page_dir_slot_t*	slot)	/* in: slot */
+{
+	page_t*	page;
+	ulint	n_slots;
+	ulint	n_owned;
+
+	ut_a(slot);
+
+	page = buf_frame_align(slot);
+
+	n_slots = page_header_get_field(page, PAGE_N_DIR_SLOTS);
+
+	ut_a(slot <= page_dir_get_nth_slot(page, 0));
+	ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
+
+	ut_a(page_rec_check(page + mach_read_from_2(slot)));
+
+	n_owned = rec_get_n_owned(page + mach_read_from_2(slot));
+
+	if (slot == page_dir_get_nth_slot(page, 0)) {
+		ut_a(n_owned == 1);
+	} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
+		ut_a(n_owned >= 1);
+		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+	} else {
+		ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
+		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
+	}
+
+	return(TRUE);
+}
+
+/*******************************************************************
+Gets the record pointed to by a directory slot. */
+UNIV_INLINE
+rec_t*
+page_dir_slot_get_rec(
+/*==================*/
+					/* out: pointer to record */
+	page_dir_slot_t*	slot)	/* in: directory slot */
+{
+	return(buf_frame_align(slot) + mach_read_from_2(slot));
+}
+
+/*******************************************************************
+This is used to set the record offset in a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_rec(
+/*==================*/
+	page_dir_slot_t* slot,	/* in: directory slot */
+	rec_t*		 rec)	/* in: record on the page */
+{
+	ut_ad(page_rec_check(rec));
+
+	mach_write_to_2(slot, rec - buf_frame_align(rec));
+}
+
+/*******************************************************************
+Gets the number of records owned by a directory slot. */
+UNIV_INLINE
+ulint
+page_dir_slot_get_n_owned(
+/*======================*/
+					/* out: number of records */
+	page_dir_slot_t* 	slot)	/* in: page directory slot */
+{
+	return(rec_get_n_owned(page_dir_slot_get_rec(slot)));
+}
+
+/*******************************************************************
+This is used to set the owned records field of a directory slot. */
+UNIV_INLINE
+void
+page_dir_slot_set_n_owned(
+/*======================*/
+	page_dir_slot_t*	slot,	/* in: directory slot */
+	ulint			n)	/* in: number of records owned 
+					by the slot */
+{
+	rec_set_n_owned(page_dir_slot_get_rec(slot), n);
+}
+
+/****************************************************************
+Calculates the space reserved for directory slots of a given number of
+records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
+PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
+UNIV_INLINE
+ulint
+page_dir_calc_reserved_space(
+/*=========================*/
+	ulint	n_recs)		/* in: number of records */
+{
+	return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
+						/ PAGE_DIR_SLOT_MIN_N_OWNED);
+} 
+
+/****************************************************************
+Gets the pointer to the next record on the page. */
+UNIV_INLINE
+rec_t*
+page_rec_get_next(
+/*==============*/
+			/* out: pointer to next record */
+	rec_t*	rec)	/* in: pointer to record */
+{
+	ulint	offs;
+	page_t*	page;
+
+	ut_ad(page_rec_check(rec));	
+
+	page = buf_frame_align(rec);
+
+	offs = rec_get_next_offs(rec);
+
+	if (offs == 0) {
+		
+		return(NULL);
+	}
+
+	return(page + offs);
+}
+
+/*******************************************************************
+Looks for the directory slot which owns the given record. */
+UNIV_INLINE
+ulint
+page_dir_find_owner_slot(
+/*=====================*/
+			/* out: the directory slot number */
+	rec_t*	rec)	/* in: the physical record */
+{
+	ulint			i;
+	page_t*			page;	
+	page_dir_slot_t*	slot;
+
+	ut_ad(page_rec_check(rec));
+
+	while (rec_get_n_owned(rec) == 0) {
+		rec = page_rec_get_next(rec);
+	}
+	
+	page = buf_frame_align(rec);
+
+	i = page_dir_get_n_slots(page) - 1;
+	slot = page_dir_get_nth_slot(page, i); 
+
+	while (page_dir_slot_get_rec(slot) != rec) {
+		i--;
+		slot = page_dir_get_nth_slot(page, i); 
+	}
+
+	return(i);
+}
+
+/****************************************************************
+Sets the pointer to the next record on the page. */ 
+UNIV_INLINE
+void
+page_rec_set_next(
+/*==============*/
+	rec_t*	rec,	/* in: pointer to record, must not be page supremum */
+	rec_t*	next)	/* in: pointer to next record, must not be page
+			infimum */
+{
+	page_t*	page;
+
+	ut_ad(page_rec_check(rec));	
+	ut_ad((next == NULL)
+	      || (buf_frame_align(rec) == buf_frame_align(next)));
+
+	page = buf_frame_align(rec);
+
+	ut_ad(rec != page_get_supremum_rec(page));
+	ut_ad(next != page_get_infimum_rec(page));
+
+	if (next == NULL) {
+		rec_set_next_offs(rec, 0);
+	} else {
+		rec_set_next_offs(rec, (ulint)(next - page));
+	}
+}
+
+/****************************************************************
+Gets the pointer to the previous record. */
+UNIV_INLINE
+rec_t*
+page_rec_get_prev(
+/*==============*/
+			/* out: pointer to previous record */
+	rec_t*	rec)	/* in: pointer to record, must not be page
+			infimum */
+{
+	page_dir_slot_t*	slot;
+	ulint			slot_no;
+	rec_t*			rec2;
+	rec_t*			prev_rec = NULL;
+	page_t*			page;
+
+	ut_ad(page_rec_check(rec));	
+
+	page = buf_frame_align(rec);
+
+	ut_ad(rec != page_get_infimum_rec(page));
+
+	slot_no = page_dir_find_owner_slot(rec);
+
+	ut_ad(slot_no != 0);
+	
+	slot = page_dir_get_nth_slot(page, slot_no - 1);
+	
+	rec2 = page_dir_slot_get_rec(slot);
+	
+	while (rec != rec2) {
+		prev_rec = rec2;
+		rec2 = page_rec_get_next(rec2);
+	}
+	
+	ut_ad(prev_rec);
+
+	return(prev_rec);
+}
+
+/*******************************************************************
+Looks for the record which owns the given record. */
+UNIV_INLINE
+rec_t*
+page_rec_find_owner_rec(
+/*====================*/
+			/* out: the owner record */
+	rec_t*	rec)	/* in: the physical record */
+{
+	ut_ad(page_rec_check(rec));
+
+	while (rec_get_n_owned(rec) == 0) {
+		rec = page_rec_get_next(rec);
+	}
+
+	return(rec);
+}
+
+/****************************************************************
+Returns the sum of the sizes of the records in the record list, excluding
+the infimum and supremum records. */
+UNIV_INLINE
+ulint
+page_get_data_size(
+/*===============*/
+			/* out: data in bytes */
+	page_t*	page)	/* in: index page */
+{
+	ulint	ret;
+
+	ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
+		      - PAGE_SUPREMUM_END
+		      - page_header_get_field(page, PAGE_GARBAGE));
+
+	ut_ad(ret < UNIV_PAGE_SIZE);
+
+	return(ret);
+}
+
+/*****************************************************************
+Calculates free space if a page is emptied. */
+UNIV_INLINE
+ulint
+page_get_free_space_of_empty(void)
+/*==============================*/
+				/* out: free space */
+{
+	return((ulint)(UNIV_PAGE_SIZE
+		- PAGE_SUPREMUM_END
+		- PAGE_DIR
+		- 2 * PAGE_DIR_SLOT_SIZE));
+}
+
+/****************************************************************
+Each user record on a page, and also the deleted user records in the heap
+takes its size plus the fraction of the dir cell size /
+PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
+value of page_get_free_space_of_empty, the insert is impossible, otherwise
+it is allowed. This function returns the maximum combined size of records
+which can be inserted on top of the record heap. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size(
+/*=====================*/
+			/* out: maximum combined size for inserted records */
+	page_t*	page,	/* in: index page */
+	ulint	n_recs)	/* in: number of records */
+{
+	ulint	occupied;
+	ulint	free_space;
+
+	occupied = page_header_get_field(page, PAGE_HEAP_TOP)
+		- PAGE_SUPREMUM_END
+		+ page_dir_calc_reserved_space(
+		     n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2));
+
+	free_space = page_get_free_space_of_empty();
+		     
+	/* Above the 'n_recs +' part reserves directory space for the new
+	inserted records; the '- 2' excludes page infimum and supremum
+	records */
+
+	if (occupied > free_space) {
+
+		return(0);
+	}
+
+	return(free_space - occupied);
+}
+
+/****************************************************************
+Returns the maximum combined size of records which can be inserted on top
+of the record heap if a page is first reorganized. */
+UNIV_INLINE
+ulint
+page_get_max_insert_size_after_reorganize(
+/*======================================*/
+			/* out: maximum combined size for inserted records */
+	page_t*	page,	/* in: index page */
+	ulint	n_recs)	/* in: number of records */
+{
+	ulint	occupied;
+	ulint	free_space;
+	
+	occupied = page_get_data_size(page)
+		+ page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
+
+	free_space = page_get_free_space_of_empty();
+
+	if (occupied > free_space) {
+
+		return(0);
+	}
+
+	return(free_space - occupied);
+}
+
+/****************************************************************
+Puts a record to free list. */
+UNIV_INLINE
+void
+page_mem_free(
+/*==========*/
+	page_t*	page,	/* in: index page */
+	rec_t*	rec)	/* in: pointer to the (origin of) record */
+{
+	rec_t*	free;
+	ulint	garbage;
+
+	free = page_header_get_ptr(page, PAGE_FREE);
+
+	page_rec_set_next(rec, free);
+	page_header_set_ptr(page, PAGE_FREE, rec);
+
+	garbage = page_header_get_field(page, PAGE_GARBAGE);
+
+	page_header_set_field(page, PAGE_GARBAGE,
+						garbage + rec_get_size(rec));
+}
+
+#ifdef UNIV_MATERIALIZE
+#undef UNIV_INLINE
+#define UNIV_INLINE	UNIV_INLINE_ORIGINAL
+#endif
diff --git a/innobase/include/page0types.h b/innobase/include/page0types.h
new file mode 100644
index 00000000000..f149aad5b98
--- /dev/null
+++ b/innobase/include/page0types.h
@@ -0,0 +1,20 @@
+/******************************************************
+Index page routines
+
+(c) 1994-1996 Innobase Oy
+
+Created 2/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef page0types_h
+#define page0types_h
+
+#include "univ.i"
+
+/* Type of the index page */
+typedef	byte		page_t;
+typedef struct page_search_struct	page_search_t;
+typedef struct page_cur_struct	page_cur_t;
+
+
+#endif 
diff --git a/innobase/include/pars0grm.h b/innobase/include/pars0grm.h
new file mode 100644
index 00000000000..d0b4b4c2e42
--- /dev/null
+++ b/innobase/include/pars0grm.h
@@ -0,0 +1,90 @@
+#ifndef YYSTYPE
+#define YYSTYPE int
+#endif
+#define	PARS_INT_LIT	258
+#define	PARS_FLOAT_LIT	259
+#define	PARS_STR_LIT	260
+#define	PARS_NULL_LIT	261
+#define	PARS_ID_TOKEN	262
+#define	PARS_AND_TOKEN	263
+#define	PARS_OR_TOKEN	264
+#define	PARS_NOT_TOKEN	265
+#define	PARS_GE_TOKEN	266
+#define	PARS_LE_TOKEN	267
+#define	PARS_NE_TOKEN	268
+#define	PARS_PROCEDURE_TOKEN	269
+#define	PARS_IN_TOKEN	270
+#define	PARS_OUT_TOKEN	271
+#define	PARS_INT_TOKEN	272
+#define	PARS_INTEGER_TOKEN	273
+#define	PARS_FLOAT_TOKEN	274
+#define	PARS_CHAR_TOKEN	275
+#define	PARS_IS_TOKEN	276
+#define	PARS_BEGIN_TOKEN	277
+#define	PARS_END_TOKEN	278
+#define	PARS_IF_TOKEN	279
+#define	PARS_THEN_TOKEN	280
+#define	PARS_ELSE_TOKEN	281
+#define	PARS_ELSIF_TOKEN	282
+#define	PARS_LOOP_TOKEN	283
+#define	PARS_WHILE_TOKEN	284
+#define	PARS_RETURN_TOKEN	285
+#define	PARS_SELECT_TOKEN	286
+#define	PARS_SUM_TOKEN	287
+#define	PARS_COUNT_TOKEN	288
+#define	PARS_DISTINCT_TOKEN	289
+#define	PARS_FROM_TOKEN	290
+#define	PARS_WHERE_TOKEN	291
+#define	PARS_FOR_TOKEN	292
+#define	PARS_DDOT_TOKEN	293
+#define	PARS_CONSISTENT_TOKEN	294
+#define	PARS_READ_TOKEN	295
+#define	PARS_ORDER_TOKEN	296
+#define	PARS_BY_TOKEN	297
+#define	PARS_ASC_TOKEN	298
+#define	PARS_DESC_TOKEN	299
+#define	PARS_INSERT_TOKEN	300
+#define	PARS_INTO_TOKEN	301
+#define	PARS_VALUES_TOKEN	302
+#define	PARS_UPDATE_TOKEN	303
+#define	PARS_SET_TOKEN	304
+#define	PARS_DELETE_TOKEN	305
+#define	PARS_CURRENT_TOKEN	306
+#define	PARS_OF_TOKEN	307
+#define	PARS_CREATE_TOKEN	308
+#define	PARS_TABLE_TOKEN	309
+#define	PARS_INDEX_TOKEN	310
+#define	PARS_UNIQUE_TOKEN	311
+#define	PARS_CLUSTERED_TOKEN	312
+#define	PARS_DOES_NOT_FIT_IN_MEM_TOKEN	313
+#define	PARS_ON_TOKEN	314
+#define	PARS_ASSIGN_TOKEN	315
+#define	PARS_DECLARE_TOKEN	316
+#define	PARS_CURSOR_TOKEN	317
+#define	PARS_SQL_TOKEN	318
+#define	PARS_OPEN_TOKEN	319
+#define	PARS_FETCH_TOKEN	320
+#define	PARS_CLOSE_TOKEN	321
+#define	PARS_NOTFOUND_TOKEN	322
+#define	PARS_TO_CHAR_TOKEN	323
+#define	PARS_TO_NUMBER_TOKEN	324
+#define	PARS_TO_BINARY_TOKEN	325
+#define	PARS_BINARY_TO_NUMBER_TOKEN	326
+#define	PARS_SUBSTR_TOKEN	327
+#define	PARS_REPLSTR_TOKEN	328
+#define	PARS_CONCAT_TOKEN	329
+#define	PARS_INSTR_TOKEN	330
+#define	PARS_LENGTH_TOKEN	331
+#define	PARS_SYSDATE_TOKEN	332
+#define	PARS_PRINTF_TOKEN	333
+#define	PARS_ASSERT_TOKEN	334
+#define	PARS_RND_TOKEN	335
+#define	PARS_RND_STR_TOKEN	336
+#define	PARS_ROW_PRINTF_TOKEN	337
+#define	PARS_COMMIT_TOKEN	338
+#define	PARS_ROLLBACK_TOKEN	339
+#define	PARS_WORK_TOKEN	340
+#define	NEG	341
+
+
+extern YYSTYPE yylval;
diff --git a/innobase/include/pars0opt.h b/innobase/include/pars0opt.h
new file mode 100644
index 00000000000..d091c3ee2d0
--- /dev/null
+++ b/innobase/include/pars0opt.h
@@ -0,0 +1,58 @@
+/******************************************************
+Simple SQL optimizer
+
+(c) 1997 Innobase Oy
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0opt_h
+#define pars0opt_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0sym.h"
+#include "dict0types.h"
+#include "row0sel.h"
+
+/***********************************************************************
+Optimizes a select. Decides which indexes to tables to use. The tables
+are accessed in the order that they were written to the FROM part in the
+select statement. */
+
+void
+opt_search_plan(
+/*============*/
+	sel_node_t*	sel_node);	/* in: parsed select node */
+/***********************************************************************
+Looks for occurrences of the columns of the table in the query subgraph and
+adds them to the list of columns if an occurrence of the same column does not
+already exist in the list. If the column is already in the list, puts a value
+indirection to point to the occurrence in the column list, except if the
+column occurrence we are looking at is in the column list, in which case
+nothing is done. */
+
+void
+opt_find_all_cols(
+/*==============*/
+	ibool		copy_val,	/* in: if TRUE, new found columns are
+					added as columns to copy */
+	dict_index_t*	index,		/* in: index to use */
+	sym_node_list_t* col_list,	/* in: base node of a list where
+					to add new found columns */
+	plan_t*		plan,		/* in: plan or NULL */
+	que_node_t*	exp);		/* in: expression or condition */
+/************************************************************************
+Prints info of a query plan. */
+
+void
+opt_print_query_plan(
+/*=================*/
+	sel_node_t*	sel_node);	/* in: select node */
+
+#ifndef UNIV_NONINL
+#include "pars0opt.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/pars0opt.ic b/innobase/include/pars0opt.ic
new file mode 100644
index 00000000000..0bfa8526bee
--- /dev/null
+++ b/innobase/include/pars0opt.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Simple SQL optimizer
+
+(c) 1997 Innobase Oy
+
+Created 12/21/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/pars0pars.h b/innobase/include/pars0pars.h
new file mode 100644
index 00000000000..e08b071e246
--- /dev/null
+++ b/innobase/include/pars0pars.h
@@ -0,0 +1,566 @@
+/******************************************************
+SQL parser
+
+(c) 1996 Innobase Oy
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0pars_h
+#define pars0pars_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+#include "trx0types.h"
+
+extern int	yydebug;
+
+/* If the following is set TRUE, the lexer will print the SQL string
+as it tokenizes it */
+
+extern ibool	pars_print_lexed;
+
+/* Global variable used while parsing a single procedure or query : the code is
+NOT re-entrant */
+extern sym_tab_t*	pars_sym_tab_global;
+
+extern pars_res_word_t	pars_to_char_token;
+extern pars_res_word_t	pars_to_number_token;
+extern pars_res_word_t	pars_to_binary_token;
+extern pars_res_word_t	pars_binary_to_number_token;
+extern pars_res_word_t	pars_substr_token;
+extern pars_res_word_t	pars_replstr_token;
+extern pars_res_word_t	pars_concat_token;
+extern pars_res_word_t	pars_length_token;
+extern pars_res_word_t	pars_instr_token;
+extern pars_res_word_t	pars_sysdate_token;
+extern pars_res_word_t	pars_printf_token;
+extern pars_res_word_t	pars_assert_token;
+extern pars_res_word_t	pars_rnd_token;
+extern pars_res_word_t	pars_rnd_str_token;
+extern pars_res_word_t	pars_count_token;
+extern pars_res_word_t	pars_sum_token;
+extern pars_res_word_t	pars_distinct_token;
+extern pars_res_word_t	pars_int_token;
+extern pars_res_word_t	pars_char_token;
+extern pars_res_word_t	pars_float_token;
+extern pars_res_word_t	pars_update_token;
+extern pars_res_word_t	pars_asc_token;
+extern pars_res_word_t	pars_desc_token;
+extern pars_res_word_t	pars_open_token;
+extern pars_res_word_t	pars_close_token;
+extern pars_res_word_t	pars_consistent_token;
+extern pars_res_word_t	pars_unique_token;
+extern pars_res_word_t	pars_clustered_token;
+
+extern ulint		pars_star_denoter;
+	
+/* Procedure parameter types */
+#define PARS_INPUT	0
+#define PARS_OUTPUT	1
+#define PARS_NOT_PARAM	2
+
+int
+yyparse(void);
+
+/*****************************************************************
+Parses an SQL string returning the query graph. */
+
+que_t*
+pars_sql(
+/*=====*/
+			/* out, own: the query graph */
+	char*	str);	/* in: SQL string */
+/*****************************************************************
+Retrieves characters to the lexical analyzer. */
+
+void
+pars_get_lex_chars(
+/*===============*/
+	char*	buf,		/* in/out: buffer where to copy */
+	int*	result,		/* out: number of characters copied or EOF */
+	int	max_size);	/* in: maximum number of characters which fit
+				in the buffer */
+/*****************************************************************
+Instructs the lexical analyzer to stop when it receives the EOF integer. */
+
+int
+yywrap(void);
+/*========*/
+		/* out: returns TRUE */
+/*****************************************************************
+Called by yyparse on error. */
+
+void
+yyerror(
+/*====*/
+        char*	s);	/* in: error message string */
+/*************************************************************************
+Parses a variable declaration. */
+
+sym_node_t*
+pars_variable_declaration(
+/*======================*/
+				/* out, own: symbol table node of type
+				SYM_VAR */
+	sym_node_t*	node,	/* in: symbol table node allocated for the
+				id of the variable */
+	pars_res_word_t* type);	/* in: pointer to a type token */
+/*************************************************************************
+Parses a function expression. */
+
+func_node_t*
+pars_func(
+/*======*/
+				/* out, own: function node in a query tree */
+	que_node_t* 	res_word,/* in: function name reserved word */
+	que_node_t*	arg);	/* in: first argument in the argument list */
+/*************************************************************************
+Parses an operator expression. */
+
+func_node_t*
+pars_op(
+/*====*/
+				/* out, own: function node in a query tree */
+	int		func,	/* in: operator token code */
+	que_node_t*	arg1,	/* in: first argument */
+	que_node_t*	arg2);	/* in: second argument or NULL for an unary
+				operator */
+/*************************************************************************
+Parses an ORDER BY clause. Order by a single column only is supported. */
+
+order_node_t*
+pars_order_by(
+/*==========*/
+				/* out, own: order-by node in a query tree */
+	sym_node_t*	column,	/* in: column name */
+	pars_res_word_t* asc);	/* in: &pars_asc_token or pars_desc_token */
+/*************************************************************************
+Parses a select list; creates a query graph node for the whole SELECT
+statement. */
+
+sel_node_t*
+pars_select_list(
+/*=============*/
+					/* out, own: select node in a query
+					tree */
+	que_node_t*	select_list,	/* in: select list */
+	sym_node_t*	into_list);	/* in: variables list or NULL */
+/*************************************************************************
+Parses a cursor declaration. */
+
+que_node_t*
+pars_cursor_declaration(
+/*====================*/
+					/* out: sym_node */
+	sym_node_t*	sym_node,	/* in: cursor id node in the symbol
+					table */
+	sel_node_t*	select_node);	/* in: select node */
+/*************************************************************************
+Parses a select statement. */
+
+sel_node_t*
+pars_select_statement(
+/*==================*/
+					/* out, own: select node in a query
+					tree */
+	sel_node_t*	select_node,	/* in: select node already containing
+					the select list */
+	sym_node_t*	table_list,	/* in: table list */
+	que_node_t*	search_cond,	/* in: search condition or NULL */
+	pars_res_word_t* for_update,	/* in: NULL or &pars_update_token */
+	pars_res_word_t* consistent_read,/* in: NULL or
+						&pars_consistent_token */
+	order_node_t*	order_by);	/* in: NULL or an order-by node */
+/*************************************************************************
+Parses a column assignment in an update. */
+
+col_assign_node_t*
+pars_column_assignment(
+/*===================*/
+				/* out: column assignment node */
+	sym_node_t*	column,	/* in: column to assign */
+	que_node_t*	exp);	/* in: value to assign */
+/*************************************************************************
+Parses a delete or update statement start. */
+
+upd_node_t*
+pars_update_statement_start(
+/*========================*/
+					/* out, own: update node in a query
+					tree */
+	ibool		is_delete,	/* in: TRUE if delete */
+	sym_node_t*	table_sym,	/* in: table name node */
+	col_assign_node_t* col_assign_list);/* in: column assignment list, NULL
+					if delete */	
+/*************************************************************************
+Parses an update or delete statement. */
+
+upd_node_t*
+pars_update_statement(
+/*==================*/
+					/* out, own: update node in a query
+					tree */
+	upd_node_t*	node,		/* in: update node */
+	sym_node_t*	cursor_sym,	/* in: pointer to a cursor entry in
+					the symbol table or NULL */
+	que_node_t*	search_cond);	/* in: search condition or NULL */
+/*************************************************************************
+Parses an insert statement. */
+
+ins_node_t*
+pars_insert_statement(
+/*==================*/
+					/* out, own: update node in a query
+					tree */
+	sym_node_t*	table_sym,	/* in: table name node */
+	que_node_t* 	values_list,	/* in: value expression list or NULL */
+	sel_node_t*	select);	/* in: select condition or NULL */
+/*************************************************************************
+Parses a procedure parameter declaration. */
+
+sym_node_t*
+pars_parameter_declaration(
+/*=======================*/
+				/* out, own: symbol table node of type
+				SYM_VAR */
+	sym_node_t*	node,	/* in: symbol table node allocated for the
+				id of the parameter */
+	ulint		param_type,
+				/* in: PARS_INPUT or PARS_OUTPUT */
+	pars_res_word_t* type);	/* in: pointer to a type token */
+/*************************************************************************
+Parses an elsif element. */
+
+elsif_node_t*
+pars_elsif_element(
+/*===============*/
+					/* out: elsif node */
+	que_node_t*	cond,		/* in: if-condition */
+	que_node_t*	stat_list);	/* in: statement list */
+/*************************************************************************
+Parses an if-statement. */
+
+if_node_t*
+pars_if_statement(
+/*==============*/
+					/* out: if-statement node */
+	que_node_t*	cond,		/* in: if-condition */
+	que_node_t*	stat_list,	/* in: statement list */
+	que_node_t*	else_part);	/* in: else-part statement list */
+/*************************************************************************
+Parses a for-loop-statement. */
+
+for_node_t*
+pars_for_statement(
+/*===============*/
+					/* out: for-statement node */
+	sym_node_t*	loop_var,	/* in: loop variable */
+	que_node_t*	loop_start_limit,/* in: loop start expression */
+	que_node_t*	loop_end_limit,	/* in: loop end expression */
+	que_node_t*	stat_list);	/* in: statement list */
+/*************************************************************************
+Parses a while-statement. */
+
+while_node_t*
+pars_while_statement(
+/*=================*/
+					/* out: while-statement node */
+	que_node_t*	cond,		/* in: while-condition */
+	que_node_t*	stat_list);	/* in: statement list */
+/*************************************************************************
+Parses a return-statement. */
+
+return_node_t*
+pars_return_statement(void);
+/*=======================*/
+					/* out: return-statement node */
+/*************************************************************************
+Parses a procedure call. */
+
+func_node_t*
+pars_procedure_call(
+/*================*/
+				/* out: function node */
+	que_node_t*	res_word,/* in: procedure name reserved word */
+	que_node_t*	args);	/* in: argument list */
+/*************************************************************************
+Parses an assignment statement. */
+
+assign_node_t*
+pars_assignment_statement(
+/*======================*/
+				/* out: assignment statement node */
+	sym_node_t*	var,	/* in: variable to assign */
+	que_node_t*	val);	/* in: value to assign */
+/*************************************************************************
+Parses a fetch statement. */
+
+fetch_node_t*
+pars_fetch_statement(
+/*=================*/
+					/* out: fetch statement node */
+	sym_node_t*	cursor,		/* in: cursor node */
+	sym_node_t*	into_list);	/* in: variables to set */
+/*************************************************************************
+Parses an open or close cursor statement. */
+
+open_node_t*
+pars_open_statement(
+/*================*/
+				/* out: fetch statement node */
+	ulint		type,	/* in: ROW_SEL_OPEN_CURSOR
+				or ROW_SEL_CLOSE_CURSOR */
+	sym_node_t*	cursor);	/* in: cursor node */
+/*************************************************************************
+Parses a row_printf-statement. */
+
+row_printf_node_t*
+pars_row_printf_statement(
+/*======================*/
+					/* out: row_printf-statement node */
+	sel_node_t*	sel_node);	/* in: select node */
+/*************************************************************************
+Parses a commit statement. */
+
+commit_node_t*
+pars_commit_statement(void);
+/*=======================*/
+/*************************************************************************
+Parses a rollback statement. */
+
+roll_node_t*
+pars_rollback_statement(void);
+/*=========================*/
+/*************************************************************************
+Parses a column definition at a table creation. */
+
+sym_node_t*
+pars_column_def(
+/*============*/
+					/* out: column sym table node */
+	sym_node_t*	sym_node,	/* in: column node in the symbol
+					table */
+	pars_res_word_t* type);		/* in: data type */
+/*************************************************************************
+Parses a table creation operation. */
+
+tab_node_t*
+pars_create_table(
+/*==============*/
+					/* out: table create subgraph */
+	sym_node_t*	table_sym,	/* in: table name node in the symbol
+					table */
+	sym_node_t*	column_defs,	/* in: list of column names */
+	void*		not_fit_in_memory);/* in: a non-NULL pointer means that
+					this is a table which in simulations
+					should be simulated as not fitting
+					in memory; thread is put to sleep
+					to simulate disk accesses; NOTE that
+					this flag is not stored to the data
+					dictionary on disk, and the database
+					will forget about non-NULL value if
+					it has to reload the table definition
+					from disk */
+/*************************************************************************
+Parses an index creation operation. */
+
+ind_node_t*
+pars_create_index(
+/*==============*/
+					/* out: index create subgraph */
+	pars_res_word_t* unique_def,	/* in: not NULL if a unique index */
+	pars_res_word_t* clustered_def,	/* in: not NULL if a clustered index */
+	sym_node_t*	index_sym,	/* in: index name node in the symbol
+					table */
+	sym_node_t*	table_sym,	/* in: table name node in the symbol
+					table */
+	sym_node_t*	column_list);	/* in: list of column names */
+/*************************************************************************
+Parses a procedure definition. */
+
+que_fork_t*
+pars_procedure_definition(
+/*======================*/
+					/* out: query fork node */
+	sym_node_t*	sym_node,	/* in: procedure id node in the symbol
+					table */
+	sym_node_t*	param_list,	/* in: parameter declaration list */
+	que_node_t*	stat_list);	/* in: statement list */
+/*****************************************************************
+Reads stored procedure input parameter values from a buffer. */
+
+void
+pars_proc_read_input_params_from_buf(
+/*=================================*/
+	que_t*	graph,	/* in: query graph which contains a stored procedure */
+	byte*	buf);	/* in: buffer */
+/*****************************************************************
+Writes stored procedure output parameter values to a buffer. */
+
+ulint
+pars_proc_write_output_params_to_buf(
+/*=================================*/
+	byte*	buf,	/* in: buffer which must be big enough */
+	que_t*	graph);	/* in: query graph which contains a stored procedure */
+/*****************************************************************
+Parses a stored procedure call, when this is not within another stored
+procedure, that is, the client issues a procedure call directly. */
+
+que_fork_t*
+pars_stored_procedure_call(
+/*=======================*/
+					/* out: query graph */
+	sym_node_t*	sym_node);	/* in: stored procedure name */
+/*****************************************************************
+Writes info about query parameter markers (denoted with '?' in ODBC) into a
+buffer. */
+
+ulint
+pars_write_query_param_info(
+/*========================*/
+				/* out: number of bytes used for info in buf */
+	byte*		buf,	/* in: buffer which must be big enough */
+	que_fork_t*	graph);	/* in: parsed query graph */
+/**********************************************************************
+Completes a query graph by adding query thread and fork nodes
+above it and prepares the graph for running. The fork created is of
+type QUE_FORK_MYSQL_INTERFACE. */
+
+que_thr_t*
+pars_complete_graph_for_exec(
+/*=========================*/
+				/* out: query thread node to run */
+	que_node_t*	node,	/* in: root node for an incomplete
+				query graph */
+	trx_t*		trx,	/* in: transaction handle */
+	mem_heap_t*	heap);	/* in: memory heap from which allocated */
+
+
+/* Struct used to denote a reserved word in a parsing tree */
+struct pars_res_word_struct{
+	ulint	code;	/* the token code for the reserved word from
+			pars0grm.h */
+};
+
+/* A predefined function or operator node in a parsing tree; this construct
+is also used for some non-functions like the assignment ':=' */
+struct func_node_struct{
+	que_common_t	common;	/* type: QUE_NODE_FUNC */
+	int		func;	/* token code of the function name */
+	ulint		class;	/* class of the function */
+	que_node_t*	args;	/* argument(s) of the function */
+	UT_LIST_NODE_T(func_node_t) cond_list;
+				/* list of comparison conditions; defined
+				only for comparison operator nodes except,
+				presently, for OPT_SCROLL_TYPE ones */
+	UT_LIST_NODE_T(func_node_t) func_node_list;
+				/* list of function nodes in a parsed
+				query graph */
+};
+
+/* An order-by node in a select */
+struct order_node_struct{
+	que_common_t	common;	/* type: QUE_NODE_ORDER */
+	sym_node_t*	column;	/* order-by column */
+	ibool		asc;	/* TRUE if ascending, FALSE if descending */
+};
+
+/* Procedure definition node */
+struct proc_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_PROC */
+	sym_node_t*	proc_id;	/* procedure name symbol in the symbol
+					table of this same procedure */
+	sym_node_t*	param_list;	/* input and output parameters */
+	que_node_t*	stat_list;	/* statement list */
+	sym_tab_t*	sym_tab;	/* symbol table of this procedure */
+	dict_proc_t*	dict_proc;	/* stored procedure node in the
+					dictionary cache, if defined */
+};
+
+/* Stored procedure call node */
+struct call_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_CALL */
+	sym_node_t*	proc_name;	/* stored procedure name */
+	dict_proc_t*	procedure_def;	/* pointer to a stored procedure graph
+					in the dictionary stored procedure
+					cache */
+	sym_tab_t*	sym_tab;	/* symbol table of this query */
+};
+
+/* elsif-element node */
+struct elsif_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_ELSIF */
+	que_node_t*	cond;		/* if condition */
+	que_node_t*	stat_list;	/* statement list */
+};
+
+/* if-statement node */
+struct if_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_IF */
+	que_node_t*	cond;		/* if condition */
+	que_node_t*	stat_list;	/* statement list */
+	que_node_t*	else_part;	/* else-part statement list */
+ 	elsif_node_t*	elsif_list;	/* elsif element list */
+};
+
+/* while-statement node */
+struct while_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_WHILE */
+	que_node_t*	cond;		/* while condition */
+	que_node_t*	stat_list;	/* statement list */
+};
+
+/* for-loop-statement node */
+struct for_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_FOR */
+	sym_node_t*	loop_var;	/* loop variable: this is the
+					dereferenced symbol from the
+					variable declarations, not the
+					symbol occurrence in the for loop
+					definition */
+	que_node_t*	loop_start_limit;/* initial value of loop variable */
+	que_node_t*	loop_end_limit;	/* end value of loop variable */
+	int		loop_end_value;	/* evaluated value for the end value:
+					it is calculated only when the loop
+					is entered, and will not change within
+					the loop */
+	que_node_t*	stat_list;	/* statement list */
+};
+
+/* return-statement node */
+struct return_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_RETURN */
+};
+
+/* Assignment statement node */
+struct assign_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_ASSIGNMENT */
+	sym_node_t*	var;		/* variable to set */
+	que_node_t*	val;		/* value to assign */
+};
+
+/* Column assignment node */
+struct col_assign_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_COL_ASSIGN */
+	sym_node_t*	col;		/* column to set */
+	que_node_t*	val;		/* value to assign */
+};
+
+/* Classes of functions */
+#define PARS_FUNC_ARITH		1	/* +, -, *, / */
+#define	PARS_FUNC_LOGICAL	2
+#define PARS_FUNC_CMP		3
+#define	PARS_FUNC_PREDEFINED	4	/* TO_NUMBER, SUBSTR, ... */
+#define	PARS_FUNC_AGGREGATE	5	/* COUNT, DISTINCT, SUM */
+#define	PARS_FUNC_OTHER		6	/* these are not real functions,
+					e.g., := */
+
+#ifndef UNIV_NONINL
+#include "pars0pars.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/pars0pars.ic b/innobase/include/pars0pars.ic
new file mode 100644
index 00000000000..155b6659ace
--- /dev/null
+++ b/innobase/include/pars0pars.ic
@@ -0,0 +1,7 @@
+/******************************************************
+SQL parser
+
+(c) 1996 Innobase Oy
+
+Created 11/19/1996 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/pars0sym.h b/innobase/include/pars0sym.h
new file mode 100644
index 00000000000..9fdeb1984a9
--- /dev/null
+++ b/innobase/include/pars0sym.h
@@ -0,0 +1,191 @@
+/******************************************************
+SQL parser symbol table
+
+(c) 1997 Innobase Oy
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0sym_h
+#define pars0sym_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "usr0types.h"
+#include "dict0types.h"
+#include "pars0types.h"
+#include "row0types.h"
+
+/**********************************************************************
+Creates a symbol table for a single stored procedure or query. */
+
+sym_tab_t*
+sym_tab_create(
+/*===========*/
+				/* out, own: symbol table */
+	mem_heap_t*	heap);	/* in: memory heap where to create */
+/**********************************************************************
+Frees the memory allocated dynamically AFTER parsing phase for variables
+etc. in the symbol table. Does not free the mem heap where the table was
+originally created. Frees also SQL explicit cursor definitions. */
+
+void
+sym_tab_free_private(
+/*=================*/
+	sym_tab_t*	sym_tab);	/* in, own: symbol table */
+/**********************************************************************
+Adds an integer literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_int_lit(
+/*================*/
+					/* out: symbol table node */
+	sym_tab_t*	sym_tab,	/* in: symbol table */
+	ulint		val);		/* in: integer value */
+/**********************************************************************
+Adds an string literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_str_lit(
+/*================*/
+					/* out: symbol table node */
+	sym_tab_t*	sym_tab,	/* in: symbol table */
+	byte*		str,		/* in: string with no quotes around
+					it */
+	ulint		len);		/* in: string length */
+/**********************************************************************
+Adds an SQL null literal to a symbol table. */
+
+sym_node_t*
+sym_tab_add_null_lit(
+/*=================*/
+					/* out: symbol table node */
+	sym_tab_t*	sym_tab);	/* in: symbol table */
+/**********************************************************************
+Adds an identifier to a symbol table. */
+
+sym_node_t*
+sym_tab_add_id(
+/*===========*/
+					/* out: symbol table node */
+	sym_tab_t*	sym_tab,	/* in: symbol table */
+	byte*		name,		/* in: identifier name */
+	ulint		len);		/* in: identifier length */
+
+#define	SYM_CLUST_FIELD_NO	0
+#define	SYM_SEC_FIELD_NO	1
+	
+struct sym_node_struct{
+	que_common_t			common;		/* node type:
+							QUE_NODE_SYMBOL */
+	/* NOTE: if the data field in 'common.val' is not NULL and the symbol
+	table node is not for a temporary column, the memory for the value has
+	been allocated from dynamic memory and it should be freed when the
+	symbol table is discarded */
+
+	sym_node_t*			indirection;	/* pointer to
+							another symbol table
+							node which contains
+							the value for this
+							node, NULL otherwise */
+	sym_node_t*			alias;		/* pointer to
+							another symbol table
+							node for which this
+							node is an alias,
+							NULL otherwise */
+	UT_LIST_NODE_T(sym_node_t)	col_var_list;	/* list of table
+							columns or a list of
+							input variables for an
+							explicit cursor */
+	ibool				copy_val;	/* TRUE if a column
+							and its value should
+							be copied to dynamic
+							memory when fetched */
+	ulint				field_nos[2];	/* if a column, in
+							the position
+							SYM_CLUST_FIELD_NO is
+							the field number in the
+							clustered index; in
+							the position
+							SYM_SEC_FIELD_NO
+							the field number in the
+							non-clustered index to
+							use first; if not found
+							from the index, then
+							ULINT_UNDEFINED */
+	ibool				resolved;	/* TRUE if the
+							meaning of a variable
+							or a column has been
+							resolved; for literals
+							this is always TRUE */
+	ulint				token_type;	/* SYM_VAR, SYM_COLUMN,
+							SYM_IMPLICIT_VAR,
+							SYM_LIT, SYM_TABLE,
+							SYM_CURSOR, ... */
+	char*				name;		/* name of an id */
+	ulint				name_len;	/* id name length */
+	dict_table_t*			table;		/* table definition
+							if a table id or a
+							column id */
+	dict_proc_t*			procedure_def;	/* stored procedure
+							definition, if a
+							stored procedure name */
+	ulint				col_no;		/* column number if a
+							column */
+	sel_buf_t*			prefetch_buf;	/* NULL, or a buffer
+							for cached column
+							values for prefetched
+							rows */
+	sel_node_t*			cursor_def;	/* cursor definition
+							select node if a
+							named cursor */
+	ulint				param_type;	/* PARS_INPUT,
+							PARS_OUTPUT, or
+							PARS_NOT_PARAM if not a
+							procedure parameter */
+	sym_tab_t*			sym_table;	/* back pointer to
+							the symbol table */
+	UT_LIST_NODE_T(sym_node_t)	sym_list;	/* list of symbol
+							nodes */
+};
+
+struct sym_tab_struct{
+	que_t*			query_graph;
+					/* query graph generated by the
+					parser */
+	char*			sql_string;
+					/* SQL string to parse */
+	int			string_len;
+					/* SQL string length */
+	int			next_char_pos;
+					/* position of the next character in
+					sql_string to give to the lexical
+					analyzer */
+	sym_node_list_t		sym_list;
+					/* list of symbol nodes in the symbol
+					table */
+	UT_LIST_BASE_NODE_T(func_node_t)
+				func_node_list;
+					/* list of function nodes in the
+					parsed query graph */
+	mem_heap_t*		heap;	/* memory heap from which we can
+					allocate space */
+};
+
+/* Types of a symbol table entry */
+#define	SYM_VAR			91	/* declared parameter or local
+					variable of a procedure */
+#define SYM_IMPLICIT_VAR	92	/* storage for a intermediate result
+					of a calculation */
+#define SYM_LIT			93	/* literal */
+#define SYM_TABLE		94	/* database table name */
+#define SYM_COLUMN		95	/* database table name */
+#define SYM_CURSOR		96	/* named cursor */
+#define SYM_PROCEDURE_NAME	97	/* stored procedure name */
+#define SYM_INDEX		98	/* database index name */
+
+#ifndef UNIV_NONINL
+#include "pars0sym.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/pars0sym.ic b/innobase/include/pars0sym.ic
new file mode 100644
index 00000000000..9508d423769
--- /dev/null
+++ b/innobase/include/pars0sym.ic
@@ -0,0 +1,7 @@
+/******************************************************
+SQL parser symbol table
+
+(c) 1997 Innobase Oy
+
+Created 12/15/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/pars0types.h b/innobase/include/pars0types.h
new file mode 100644
index 00000000000..e7471260501
--- /dev/null
+++ b/innobase/include/pars0types.h
@@ -0,0 +1,29 @@
+/******************************************************
+SQL parser global types
+
+(c) 1997 Innobase Oy
+
+Created 1/11/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef pars0types_h
+#define pars0types_h
+
+typedef struct sym_node_struct		sym_node_t;
+typedef struct sym_tab_struct		sym_tab_t;
+typedef struct pars_res_word_struct	pars_res_word_t;
+typedef struct func_node_struct		func_node_t;
+typedef struct order_node_struct	order_node_t;
+typedef struct proc_node_struct		proc_node_t;
+typedef struct call_node_struct		call_node_t;
+typedef struct elsif_node_struct	elsif_node_t;
+typedef struct if_node_struct		if_node_t;
+typedef struct while_node_struct	while_node_t;
+typedef struct for_node_struct		for_node_t;
+typedef struct return_node_struct	return_node_t;
+typedef struct assign_node_struct	assign_node_t;
+typedef struct col_assign_node_struct	col_assign_node_t;
+
+typedef UT_LIST_BASE_NODE_T(sym_node_t)	sym_node_list_t;
+
+#endif 
diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h
new file mode 100644
index 00000000000..bd21a9801aa
--- /dev/null
+++ b/innobase/include/que0que.h
@@ -0,0 +1,495 @@
+/******************************************************
+Query graph
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0que_h
+#define que0que_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "pars0types.h"
+
+/* If the following flag is set TRUE, the module will print trace info
+of SQL execution in the UNIV_SQL_DEBUG version */
+extern ibool	que_trace_on;
+
+/***************************************************************************
+Adds a query graph to the session's list of graphs. */
+
+void
+que_graph_publish(
+/*==============*/
+	que_t*	graph,	/* in: graph */
+	sess_t*	sess);	/* in: session */
+/***************************************************************************
+Creates a query graph fork node. */
+
+que_fork_t*
+que_fork_create(
+/*============*/
+					/* out, own: fork node */
+	que_t*		graph,		/* in: graph, if NULL then this
+					fork node is assumed to be the
+					graph root */
+	que_node_t*	parent,		/* in: parent node */
+	ulint		fork_type,	/* in: fork type */
+	mem_heap_t*	heap);		/* in: memory heap where created */
+/***************************************************************************
+Gets the first thr in a fork. */
+UNIV_INLINE
+que_thr_t*
+que_fork_get_first_thr(
+/*===================*/
+	que_fork_t*	fork); 	/* in: query fork */
+/***************************************************************************
+Gets the child node of the first thr in a fork. */
+UNIV_INLINE
+que_node_t*
+que_fork_get_child(
+/*===============*/
+	que_fork_t*	fork);	/* in: query fork */
+/***************************************************************************
+Sets the parent of a graph node. */
+UNIV_INLINE
+void
+que_node_set_parent(
+/*================*/
+	que_node_t*	node,	/* in: graph node */
+	que_node_t*	parent);/* in: parent */
+/***************************************************************************
+Creates a query graph thread node. */
+
+que_thr_t*
+que_thr_create(
+/*===========*/
+				/* out, own: query thread node */
+	que_fork_t*	parent,	/* in: parent node, i.e., a fork node */
+	mem_heap_t*	heap);	/* in: memory heap where created */
+/**************************************************************************
+Checks if the query graph is in a state where it should be freed, and
+frees it in that case. If the session is in a state where it should be
+closed, also this is done. */
+
+ibool
+que_graph_try_free(
+/*===============*/
+			/* out: TRUE if freed */
+	que_t*	graph);	/* in: query graph */
+/**************************************************************************
+Frees a query graph, but not the heap where it was created. Does not free
+explicit cursor declarations, they are freed in que_graph_free. */
+
+void
+que_graph_free_recursive(
+/*=====================*/
+	que_node_t*	node);	/* in: query graph node */
+/**************************************************************************
+Frees a query graph. */
+
+void
+que_graph_free(
+/*===========*/
+	que_t*	graph);	/* in: query graph; we assume that the memory
+			heap where this graph was created is private
+			to this graph: if not, then use
+			que_graph_free_recursive and free the heap
+			afterwards! */
+/**************************************************************************
+Stops a query thread if graph or trx is in a state requiring it. The
+conditions are tested in the order (1) graph, (2) trx. The kernel mutex has
+to be reserved. */
+
+ibool
+que_thr_stop(
+/*=========*/
+				/* out: TRUE if stopped */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction. */
+UNIV_INLINE
+void
+que_thr_move_to_run_state_for_mysql(
+/*================================*/
+	que_thr_t*	thr,	/* in: an query thread */
+	trx_t*		trx);	/* in: transaction */
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select, when there is no error or lock wait. */
+UNIV_INLINE
+void
+que_thr_stop_for_mysql_no_error(
+/*============================*/
+	que_thr_t*	thr,	/* in: query thread */
+	trx_t*		trx);	/* in: transaction */
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select. */
+
+void
+que_thr_stop_for_mysql(
+/*===================*/
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Runs query threads. Note that the individual query thread which is run
+within this function may change if, e.g., the OS thread executing this
+function uses a threshold amount of resources. */
+
+void
+que_run_threads(
+/*============*/
+	que_thr_t*	thr);	/* in: query thread which is run initially */
+/**************************************************************************
+After signal handling is finished, returns control to a query graph error
+handling routine. (Currently, just returns the control to the root of the
+graph so that the graph can communicate an error message to the client.) */
+
+void
+que_fork_error_handle(
+/*==================*/
+	trx_t*	trx,	/* in: trx */
+	que_t*	fork);	/* in: query graph which was run before signal
+			handling started, NULL not allowed */
+/**************************************************************************
+Handles an SQL error noticed during query thread execution. At the moment,
+does nothing! */
+
+void
+que_thr_handle_error(
+/*=================*/
+	que_thr_t*	thr,	/* in: query thread */
+	ulint		err_no,	/* in: error number */
+	byte*		err_str,/* in, own: error string or NULL; NOTE: the
+				function will take care of freeing of the
+				string! */
+	ulint		err_len);/* in: error string length */	
+/**************************************************************************
+Moves a suspended query thread to the QUE_THR_RUNNING state and releases
+a single worker thread to execute it. This function should be used to end
+the wait state of a query thread waiting for a lock or a stored procedure
+completion. */
+
+void
+que_thr_end_wait(
+/*=============*/
+	que_thr_t*	thr,		/* in: query thread in the
+					QUE_THR_LOCK_WAIT,
+					or QUE_THR_PROCEDURE_WAIT, or
+					QUE_THR_SIG_REPLY_WAIT state */
+	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+					if the value which is passed in is
+					a pointer to a NULL pointer, then the
+					calling function can start running
+					a new query thread */
+/**************************************************************************
+Same as que_thr_end_wait, but no parameter next_thr available. */
+
+void
+que_thr_end_wait_no_next_thr(
+/*=========================*/
+	que_thr_t*	thr);		/* in: query thread in the
+					QUE_THR_LOCK_WAIT,
+					or QUE_THR_PROCEDURE_WAIT, or
+					QUE_THR_SIG_REPLY_WAIT state */
+/**************************************************************************
+Starts execution of a command in a query fork. Picks a query thread which
+is not in the QUE_THR_RUNNING state and moves it to that state. If none
+can be chosen, a situation which may arise in parallelized fetches, NULL
+is returned. */
+
+que_thr_t*
+que_fork_start_command(
+/*===================*/
+				/* out: a query thread of the graph moved to
+				QUE_THR_RUNNING state, or NULL; the query
+				thread should be executed by que_run_threads
+				by the caller */
+	que_fork_t* 	fork,	/* in: a query fork */
+	ulint		command,/* in: command SESS_COMM_FETCH_NEXT, ... */
+	ulint		param);	/* in: possible parameter to the command */
+/***************************************************************************
+Gets the trx of a query thread. */
+UNIV_INLINE
+trx_t*
+thr_get_trx(
+/*========*/
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************************
+Gets the type of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_type(
+/*==============*/
+	que_node_t*	node);	/* in: graph node */
+/***************************************************************************
+Gets pointer to the value data type field of a graph node. */
+UNIV_INLINE
+dtype_t*
+que_node_get_data_type(
+/*===================*/
+	que_node_t*	node);	/* in: graph node */
+/***************************************************************************
+Gets pointer to the value dfield of a graph node. */
+UNIV_INLINE
+dfield_t*
+que_node_get_val(
+/*=============*/
+	que_node_t*	node);	/* in: graph node */
+/***************************************************************************
+Gets the value buffer size of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_val_buf_size(
+/*======================*/
+				/* out: val buffer size, not defined if
+				val.data == NULL in node */
+	que_node_t*	node);	/* in: graph node */
+/***************************************************************************
+Sets the value buffer size of a graph node. */
+UNIV_INLINE
+void
+que_node_set_val_buf_size(
+/*======================*/
+	que_node_t*	node,	/* in: graph node */
+	ulint		size);	/* in: size */
+/*************************************************************************
+Gets the next list node in a list of query graph nodes. */
+UNIV_INLINE
+que_node_t*
+que_node_get_next(
+/*==============*/
+	que_node_t*	node);	/* in: node in a list */
+/*************************************************************************
+Gets the parent node of a query graph node. */
+UNIV_INLINE
+que_node_t*
+que_node_get_parent(
+/*================*/
+				/* out: parent node or NULL */
+	que_node_t*	node);	/* in: node */
+/*************************************************************************
+Catenates a query graph node to a list of them, possible empty list. */
+UNIV_INLINE
+que_node_t*
+que_node_list_add_last(
+/*===================*/
+					/* out: one-way list of nodes */
+	que_node_t*	node_list,	/* in: node list, or NULL */
+	que_node_t*	node);		/* in: node */
+/*************************************************************************
+Gets a query graph node list length. */
+UNIV_INLINE
+ulint
+que_node_list_get_len(
+/*==================*/
+					/* out: length, for NULL list 0 */
+	que_node_t*	node_list);	/* in: node list, or NULL */
+/**************************************************************************
+Checks if graph, trx, or session is in a state where the query thread should
+be stopped. */
+UNIV_INLINE
+ibool
+que_thr_peek_stop(
+/*==============*/
+				/* out: TRUE if should be stopped; NOTE that
+				if the peek is made without reserving the
+				kernel mutex, then another peek with the
+				mutex reserved is necessary before deciding
+				the actual stopping */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************************
+Returns TRUE if the query graph is for a SELECT statement. */
+UNIV_INLINE
+ibool
+que_graph_is_select(
+/*================*/
+					/* out: TRUE if a select */
+	que_t*		graph);		/* in: graph */
+/**************************************************************************
+Prints info of an SQL query graph node. */
+
+void
+que_node_print_info(
+/*================*/
+	que_node_t*	node);	/* in: query graph node */
+
+
+/* Query graph query thread node: the fields are protected by the kernel
+mutex with the exceptions named below */
+
+struct que_thr_struct{
+	que_common_t	common;		/* type: QUE_NODE_THR */
+	que_node_t*	child;		/* graph child node */
+	que_t*		graph;		/* graph where this node belongs */
+	ibool		is_active;	/* TRUE if the thread has been set
+					to the run state in
+					que_thr_move_to_run_state, but not
+					deactivated in
+					que_thr_dec_reference_count */
+	ulint		state;		/* state of the query thread */
+	UT_LIST_NODE_T(que_thr_t)
+			thrs;		/* list of thread nodes of the fork
+					node */
+	UT_LIST_NODE_T(que_thr_t)
+			trx_thrs;	/* lists of threads in wait list of
+					the trx */
+	UT_LIST_NODE_T(que_thr_t)
+			queue;		/* list of runnable thread nodes in
+					the server task queue */
+	/*------------------------------*/
+	/* The following fields are private to the OS thread executing the
+	query thread, and are not protected by the kernel mutex: */
+
+	que_node_t*	run_node;	/* pointer to the node where the
+					subgraph down from this node is
+					currently executed */
+	que_node_t*	prev_node;	/* pointer to the node from which
+					the control came */
+	ulint		resource;	/* resource usage of the query thread
+					thus far */
+};
+
+/* Query graph fork node: its fields are protected by the kernel mutex */
+struct que_fork_struct{
+	que_common_t	common;		/* type: QUE_NODE_FORK */
+	que_t*		graph;		/* query graph of this node */
+	ulint		fork_type;	/* fork type */
+	ulint		n_active_thrs;	/* if this is the root of a graph, the
+					number query threads that have been
+					started in que_thr_move_to_run_state
+					but for which que_thr_dec_refer_count
+					has not yet been called */
+	trx_t*		trx;		/* transaction: this is set only in
+					the root node */
+	ulint		state;		/* state of the fork node */
+	que_thr_t*	caller;		/* pointer to a possible calling query
+					thread */
+	UT_LIST_BASE_NODE_T(que_thr_t)
+			thrs;		/* list of query threads */
+	/*------------------------------*/
+	/* The fields in this section are defined only in the root node */
+	sym_tab_t*	sym_tab;	/* symbol table of the query,
+					generated by the parser, or NULL
+					if the graph was created 'by hand' */
+	ulint		id;		/* id of this query graph */
+	ulint		command;	/* command currently executed in the
+					graph */
+	ulint		param;		/* possible command parameter */
+
+	/* The following cur_... fields are relevant only in a select graph */
+
+	ulint		cur_end;	/* QUE_CUR_NOT_DEFINED, QUE_CUR_START,
+					QUE_CUR_END */
+	ulint		cur_pos;	/* if there are n rows in the result
+					set, values 0 and n + 1 mean before
+					first row, or after last row, depending
+					on cur_end; values 1...n mean a row
+					index */
+	ibool		cur_on_row;	/* TRUE if cursor is on a row, i.e.,
+					it is not before the first row or
+					after the last row */
+	dulint		n_inserts;	/* number of rows inserted */
+	dulint		n_updates;	/* number of rows updated */
+	dulint		n_deletes;	/* number of rows deleted */
+	sel_node_t*	last_sel_node;	/* last executed select node, or NULL
+					if none */
+	UT_LIST_NODE_T(que_fork_t)
+			graphs;		/* list of query graphs of a session
+					or a stored procedure */
+	/*------------------------------*/
+	mem_heap_t*	heap;		/* memory heap where the fork was
+					created */
+	
+};
+
+/* Query fork (or graph) types */
+#define QUE_FORK_SELECT_NON_SCROLL	1	/* forward-only cursor */
+#define QUE_FORK_SELECT_SCROLL		2	/* scrollable cursor */
+#define QUE_FORK_INSERT			3
+#define QUE_FORK_UPDATE			4
+#define QUE_FORK_ROLLBACK		5
+			/* This is really the undo graph used in rollback,
+			no signal-sending roll_node in this graph */
+#define QUE_FORK_PURGE			6
+#define	QUE_FORK_EXECUTE		7
+#define QUE_FORK_PROCEDURE		8
+#define QUE_FORK_PROCEDURE_CALL		9
+#define QUE_FORK_MYSQL_INTERFACE	10
+#define	QUE_FORK_RECOVERY		11
+
+/* Query fork (or graph) states */
+#define QUE_FORK_ACTIVE		1
+#define QUE_FORK_COMMAND_WAIT	2
+#define QUE_FORK_INVALID	3
+#define QUE_FORK_BEING_FREED	4
+
+/* Flag which is ORed to control structure statement node types */
+#define QUE_NODE_CONTROL_STAT	1024
+
+/* Query graph node types */
+#define	QUE_NODE_LOCK		1
+#define	QUE_NODE_INSERT		2
+#define QUE_NODE_UPDATE		4
+#define	QUE_NODE_CURSOR		5
+#define	QUE_NODE_SELECT		6
+#define	QUE_NODE_AGGREGATE	7
+#define QUE_NODE_FORK		8
+#define QUE_NODE_THR		9
+#define QUE_NODE_UNDO		10
+#define QUE_NODE_COMMIT		11
+#define QUE_NODE_ROLLBACK	12
+#define QUE_NODE_PURGE		13
+#define QUE_NODE_CREATE_TABLE	14
+#define QUE_NODE_CREATE_INDEX	15
+#define QUE_NODE_SYMBOL		16
+#define QUE_NODE_RES_WORD	17
+#define QUE_NODE_FUNC		18
+#define QUE_NODE_ORDER		19
+#define QUE_NODE_PROC		(20 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_IF		(21 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_WHILE		(22 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_ASSIGNMENT	23
+#define QUE_NODE_FETCH		24
+#define QUE_NODE_OPEN		25
+#define QUE_NODE_COL_ASSIGNMENT	26
+#define QUE_NODE_FOR		(27 + QUE_NODE_CONTROL_STAT)
+#define QUE_NODE_RETURN		28
+#define QUE_NODE_ROW_PRINTF	29
+#define QUE_NODE_ELSIF		30
+#define QUE_NODE_CALL		31
+
+/* Query thread states */
+#define QUE_THR_RUNNING		1
+#define QUE_THR_PROCEDURE_WAIT	2
+#define	QUE_THR_COMPLETED	3	/* in selects this means that the
+					thread is at the end of its result set
+					(or start, in case of a scroll cursor);
+					in other statements, this means the
+					thread has done its task */
+#define QUE_THR_COMMAND_WAIT	4
+#define QUE_THR_LOCK_WAIT	5
+#define QUE_THR_SIG_REPLY_WAIT	6
+#define QUE_THR_SUSPENDED	7
+#define QUE_THR_ERROR		8
+
+/* From where the cursor position is counted */
+#define QUE_CUR_NOT_DEFINED	1
+#define QUE_CUR_START		2
+#define	QUE_CUR_END		3
+
+
+#ifndef UNIV_NONINL
+#include "que0que.ic"
+#endif
+
+#endif
diff --git a/innobase/include/que0que.ic b/innobase/include/que0que.ic
new file mode 100644
index 00000000000..e19198aad0e
--- /dev/null
+++ b/innobase/include/que0que.ic
@@ -0,0 +1,304 @@
+/******************************************************
+Query graph
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "usr0sess.h"
+
+/***************************************************************************
+Gets the trx of a query thread. */
+UNIV_INLINE
+trx_t*
+thr_get_trx(
+/*========*/
+	que_thr_t*	thr)	/* in: query thread */
+{
+	ut_ad(thr);
+
+	return(thr->graph->trx);
+}
+
+/***************************************************************************
+Gets the first thr in a fork. */
+UNIV_INLINE
+que_thr_t*
+que_fork_get_first_thr(
+/*===================*/
+	que_fork_t*	fork) 	/* in: query fork */
+{
+	return(UT_LIST_GET_FIRST(fork->thrs));
+}
+
+/***************************************************************************
+Gets the child node of the first thr in a fork. */
+UNIV_INLINE
+que_node_t*
+que_fork_get_child(
+/*===============*/
+	que_fork_t*	fork) 	/* in: query fork */
+{
+	que_thr_t*	thr;
+
+	thr = UT_LIST_GET_FIRST(fork->thrs);
+
+	return(thr->child);
+}
+
+/***************************************************************************
+Gets the type of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_type(
+/*==============*/
+	que_node_t*	node)	/* in: graph node */
+{
+	ut_ad(node);
+
+	return(((que_common_t*)node)->type);
+}
+
+/***************************************************************************
+Gets pointer to the value dfield of a graph node. */
+UNIV_INLINE
+dfield_t*
+que_node_get_val(
+/*=============*/
+	que_node_t*	node)	/* in: graph node */
+{
+	ut_ad(node);
+
+	return(&(((que_common_t*)node)->val));
+}
+
+/***************************************************************************
+Gets the value buffer size of a graph node. */
+UNIV_INLINE
+ulint
+que_node_get_val_buf_size(
+/*======================*/
+				/* out: val buffer size, not defined if
+				val.data == NULL in node */
+	que_node_t*	node)	/* in: graph node */
+{
+	ut_ad(node);
+
+	return(((que_common_t*)node)->val_buf_size);
+}
+
+/***************************************************************************
+Sets the value buffer size of a graph node. */
+UNIV_INLINE
+void
+que_node_set_val_buf_size(
+/*======================*/
+	que_node_t*	node,	/* in: graph node */
+	ulint		size)	/* in: size */
+{
+	ut_ad(node);
+
+	((que_common_t*)node)->val_buf_size = size;
+}
+
+/***************************************************************************
+Sets the parent of a graph node. */
+UNIV_INLINE
+void
+que_node_set_parent(
+/*================*/
+	que_node_t*	node,	/* in: graph node */
+	que_node_t*	parent)	/* in: parent */
+{
+	ut_ad(node);
+
+	((que_common_t*)node)->parent = parent;
+}
+
+/***************************************************************************
+Gets pointer to the value data type field of a graph node. */
+UNIV_INLINE
+dtype_t*
+que_node_get_data_type(
+/*===================*/
+	que_node_t*	node)	/* in: graph node */
+{
+	ut_ad(node);
+
+	return(&(((que_common_t*)node)->val.type));
+}
+
+/*************************************************************************
+Catenates a query graph node to a list of them, possible empty list. */
+UNIV_INLINE
+que_node_t*
+que_node_list_add_last(
+/*===================*/
+					/* out: one-way list of nodes */
+	que_node_t*	node_list,	/* in: node list, or NULL */
+	que_node_t*	node)		/* in: node */
+{
+	que_common_t*	cnode;
+	que_common_t*	cnode2;
+
+	cnode = node;
+	
+	cnode->brother = NULL;
+
+	if (node_list == NULL) {
+
+		return(node);
+	}
+
+	cnode2 = node_list;
+	
+	while (cnode2->brother != NULL) {
+		cnode2 = cnode2->brother;
+	}
+
+	cnode2->brother = node;
+
+	return(node_list);
+}
+
+/*************************************************************************
+Gets the next list node in a list of query graph nodes. */
+UNIV_INLINE
+que_node_t*
+que_node_get_next(
+/*==============*/
+				/* out: next node in a list of nodes */
+	que_node_t*	node)	/* in: node in a list */
+{
+	return(((que_common_t*)node)->brother);
+}
+
+/*************************************************************************
+Gets a query graph node list length. */
+UNIV_INLINE
+ulint
+que_node_list_get_len(
+/*==================*/
+					/* out: length, for NULL list 0 */
+	que_node_t*	node_list)	/* in: node list, or NULL */
+{
+	que_common_t*	cnode;
+	ulint		len;
+
+	cnode = node_list;
+	len = 0;
+	
+	while (cnode != NULL) {
+		len++;
+		cnode = cnode->brother;
+	}
+
+	return(len);
+}
+
+/*************************************************************************
+Gets the parent node of a query graph node. */
+UNIV_INLINE
+que_node_t*
+que_node_get_parent(
+/*================*/
+				/* out: parent node or NULL */
+	que_node_t*	node)	/* in: node */
+{
+	return(((que_common_t*)node)->parent);
+}
+
+/**************************************************************************
+Checks if graph, trx, or session is in a state where the query thread should
+be stopped. */
+UNIV_INLINE
+ibool
+que_thr_peek_stop(
+/*==============*/
+				/* out: TRUE if should be stopped; NOTE that
+				if the peek is made without reserving the
+				kernel mutex, then another peek with the
+				mutex reserved is necessary before deciding
+				the actual stopping */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	trx_t*	trx;
+	que_t*	graph;
+
+	graph = thr->graph;
+	trx = graph->trx;
+	
+	if (graph->state != QUE_FORK_ACTIVE
+	    || trx->que_state == TRX_QUE_LOCK_WAIT
+	    || (UT_LIST_GET_LEN(trx->signals) > 0
+				&& trx->que_state == TRX_QUE_RUNNING)) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***************************************************************************
+Returns TRUE if the query graph is for a SELECT statement. */
+UNIV_INLINE
+ibool
+que_graph_is_select(
+/*================*/
+					/* out: TRUE if a select */
+	que_t*		graph)		/* in: graph */
+{
+	if (graph->fork_type == QUE_FORK_SELECT_SCROLL
+			|| graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
+
+	    	return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/**************************************************************************
+Moves a thread from another state to the QUE_THR_RUNNING state. Increments
+the n_active_thrs counters of the query graph and transaction if thr was
+not active. */
+UNIV_INLINE
+void
+que_thr_move_to_run_state_for_mysql(
+/*================================*/
+	que_thr_t*	thr,	/* in: an query thread */
+	trx_t*		trx)	/* in: transaction */
+{
+	if (!thr->is_active) {
+
+		(thr->graph)->n_active_thrs++;
+
+		trx->n_active_thrs++;
+
+		thr->is_active = TRUE;
+
+		ut_ad((thr->graph)->n_active_thrs == 1);
+		ut_ad(trx->n_active_thrs == 1);
+	}
+	
+	thr->state = QUE_THR_RUNNING;
+}
+
+/**************************************************************************
+A patch for MySQL used to 'stop' a dummy query thread used in MySQL
+select, when there is no error or lock wait. */
+UNIV_INLINE
+void
+que_thr_stop_for_mysql_no_error(
+/*============================*/
+	que_thr_t*	thr,	/* in: query thread */
+	trx_t*		trx)	/* in: transaction */
+{
+	ut_ad(thr->state == QUE_THR_RUNNING);
+		
+	thr->state = QUE_THR_COMPLETED;
+
+	thr->is_active = FALSE;
+	(thr->graph)->n_active_thrs--;
+
+	trx->n_active_thrs--;
+}
diff --git a/innobase/include/que0types.h b/innobase/include/que0types.h
new file mode 100644
index 00000000000..c7ce09db40b
--- /dev/null
+++ b/innobase/include/que0types.h
@@ -0,0 +1,42 @@
+/******************************************************
+Query graph global types
+
+(c) 1996 Innobase Oy
+
+Created 5/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef que0types_h
+#define que0types_h
+
+#include "data0data.h"
+#include "dict0types.h"
+
+/* Pseudotype for all graph nodes */
+typedef void	que_node_t;
+					
+typedef struct que_fork_struct	que_fork_t;
+
+/* Query graph root is a fork node */
+typedef	que_fork_t	que_t;
+
+typedef struct que_thr_struct		que_thr_t;
+typedef struct que_common_struct	que_common_t;
+
+/* Common struct at the beginning of each query graph node; the name of this
+substruct must be 'common' */
+
+struct que_common_struct{
+	ulint		type;	/* query node type */
+	que_node_t*	parent;	/* back pointer to parent node, or NULL */
+	que_node_t*	brother;/* pointer to a possible brother node */
+	dfield_t	val;	/* evaluated value for an expression */
+	ulint		val_buf_size;
+				/* buffer size for the evaluated value data,
+				if the buffer has been allocated dynamically:
+				if this field is != 0, and the node is a
+				symbol node or a function node, then we
+				have to free the data field in val explicitly */
+};
+
+#endif
diff --git a/innobase/include/read0read.h b/innobase/include/read0read.h
new file mode 100644
index 00000000000..dea952c8547
--- /dev/null
+++ b/innobase/include/read0read.h
@@ -0,0 +1,92 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0read_h
+#define read0read_h
+
+#include "univ.i"
+
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "trx0trx.h"
+#include "read0types.h"
+
+/*************************************************************************
+Opens a read view where exactly the transactions serialized before this
+point in time are seen in the view. */
+
+read_view_t*
+read_view_open_now(
+/*===============*/
+				/* out, own: read view struct */
+	trx_t*		cr_trx,	/* in: creating transaction, or NULL */
+	mem_heap_t*	heap);	/* in: memory heap from which allocated */
+/*************************************************************************
+Makes a copy of the oldest existing read view, or opens a new. The view
+must be closed with ..._close. */
+
+read_view_t*
+read_view_oldest_copy_or_open_new(
+/*==============================*/
+				/* out, own: read view struct */
+	trx_t*		cr_trx,	/* in: creating transaction, or NULL */
+	mem_heap_t*	heap);	/* in: memory heap from which allocated */
+/*************************************************************************
+Closes a read view. */
+
+void
+read_view_close(
+/*============*/
+	read_view_t*	view);	/* in: read view */
+/*************************************************************************
+Checks if a read view sees the specified transaction. */
+UNIV_INLINE
+ibool
+read_view_sees_trx_id(
+/*==================*/
+				/* out: TRUE if sees */
+	read_view_t*	view,	/* in: read view */
+	dulint		trx_id);	/* in: trx id */
+
+
+/* Read view lists the trx ids of those transactions for which a consistent
+read should not see the modifications to the database. */
+
+struct read_view_struct{
+	ibool	can_be_too_old;	/* TRUE if the system has had to purge old
+				versions which this read view should be able
+				to access: the read view can bump into the
+				DB_MISSING_HISTORY error */
+	dulint	low_limit_no;	/* The view does not need to see the undo
+				logs for transactions whose transaction number
+				is strictly smaller (<) than this value: they
+				can be removed in purge if not needed by other
+				views */
+	dulint	low_limit_id;	/* The read should not see any transaction
+				with trx id >= this value */
+	dulint	up_limit_id;	/* The read should see all trx ids which
+				are strictly smaller (<) than this value */
+	ulint	n_trx_ids;	/* Number of cells in the trx_ids array */
+	dulint*	trx_ids;	/* Additional trx ids which the read should
+				not see: typically, these are the active
+				transactions at the time when the read is
+				serialized, except the reading transaction
+				itself; the trx ids in this array are in a
+				descending order */
+	trx_t*	creator;	/* Pointer to the creating transaction, or
+				NULL if used in purge */
+	UT_LIST_NODE_T(read_view_t) view_list;
+				/* List of read views in trx_sys */
+};
+
+#ifndef UNIV_NONINL
+#include "read0read.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/read0read.ic b/innobase/include/read0read.ic
new file mode 100644
index 00000000000..03d84ee0c51
--- /dev/null
+++ b/innobase/include/read0read.ic
@@ -0,0 +1,85 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+/*************************************************************************
+Gets the nth trx id in a read view. */
+UNIV_INLINE
+dulint
+read_view_get_nth_trx_id(
+/*=====================*/
+				/* out: trx id */
+	read_view_t*	view,	/* in: read view */
+	ulint		n)	/* in: position */
+{
+	ut_ad(n < view->n_trx_ids);
+
+	return(*(view->trx_ids + n));
+}
+
+/*************************************************************************
+Sets the nth trx id in a read view. */
+UNIV_INLINE
+void
+read_view_set_nth_trx_id(
+/*=====================*/
+	read_view_t*	view,	/* in: read view */
+	ulint		n,	/* in: position */
+	dulint		trx_id)	/* in: trx id to set */
+{
+	ut_ad(n < view->n_trx_ids);
+
+	*(view->trx_ids + n) = trx_id;
+}
+
+/*************************************************************************
+Checks if a read view sees the specified transaction. */
+UNIV_INLINE
+ibool
+read_view_sees_trx_id(
+/*==================*/
+				/* out: TRUE if sees */
+	read_view_t*	view,	/* in: read view */
+	dulint		trx_id)	/* in: trx id */
+{
+	ulint	n_ids;
+	int	cmp;
+	ulint	i;
+	
+	if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
+
+		return(TRUE);
+	}
+
+	if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
+
+		return(FALSE);
+	}
+
+	/* We go through the trx ids in the array smallest first: this order
+	may save CPU time, because if there was a very long running
+	transaction in the trx id array, its trx id is looked at first, and
+	the first two comparisons may well decide the visibility of trx_id. */
+
+	n_ids = view->n_trx_ids;
+
+	for (i = 0; i < n_ids; i++) {
+
+		cmp = ut_dulint_cmp(trx_id,
+				read_view_get_nth_trx_id(view, n_ids - i - 1));
+		if (0 == cmp) {
+
+			return(FALSE);
+
+		} else if (cmp < 0) {
+
+			return(TRUE);
+		}
+	}
+	
+	return(TRUE);
+}
diff --git a/innobase/include/read0types.h b/innobase/include/read0types.h
new file mode 100644
index 00000000000..5eb3e533f89
--- /dev/null
+++ b/innobase/include/read0types.h
@@ -0,0 +1,14 @@
+/******************************************************
+Cursor read
+
+(c) 1997 Innobase Oy
+
+Created 2/16/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef read0types_h
+#define read0types_h
+
+typedef struct read_view_struct	read_view_t;
+
+#endif
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
new file mode 100644
index 00000000000..77b9ef9edc8
--- /dev/null
+++ b/innobase/include/rem0cmp.h
@@ -0,0 +1,130 @@
+/***********************************************************************
+Comparison services for records
+
+(c) 1994-1996 Innobase Oy
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef rem0cmp_h
+#define rem0cmp_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "rem0rec.h"
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/	
+				/* out: 1, 0, -1, if dfield1 is greater, equal, 
+				less than dfield2, respectively */
+	dfield_t*	dfield1,/* in: data field; must have type field set */
+	dfield_t*	dfield2);/* in: data field */
+/*****************************************************************
+This function is used to compare a data tuple to a physical record.
+Only dtuple->n_fields_cmp first fields are taken into account for
+the the data tuple! If we denote by n = n_fields_cmp, then rec must
+have either m >= n fields, or it must differ from dtuple in some of
+the m fields rec has. */
+
+int
+cmp_dtuple_rec_with_match(
+/*======================*/	
+				/* out: 1, 0, -1, if dtuple is greater, equal, 
+				less than rec, respectively, when only the 
+				common first fields are compared */
+	dtuple_t*	dtuple,	/* in: data tuple */
+	rec_t*		rec,	/* in: physical record which differs from
+				dtuple in some of the common fields, or which
+				has an equal number or more fields than
+				dtuple */
+	ulint*	 	matched_fields, /* in/out: number of already completely 
+				matched fields; when function returns,
+				contains the value for current comparison */
+	ulint*	  	matched_bytes); /* in/out: number of already matched 
+				bytes within the first field not completely
+				matched; when function returns, contains the
+				value for current comparison */
+/******************************************************************
+Compares a data tuple to a physical record. */
+
+int
+cmp_dtuple_rec(
+/*===========*/
+				/* out: 1, 0, -1, if dtuple is greater, equal, 
+				less than rec, respectively; see the comments
+				for cmp_dtuple_rec_with_match */
+	dtuple_t* 	dtuple,	/* in: data tuple */
+	rec_t*	  	rec);	/* in: physical record */
+/******************************************************************
+Checks if a dtuple is a prefix of a record. The last field in dtuple
+is allowed to be a prefix of the corresponding field in the record. */
+
+ibool
+cmp_dtuple_is_prefix_of_rec(
+/*========================*/
+				/* out: TRUE if prefix */
+	dtuple_t* 	dtuple,	/* in: data tuple */
+	rec_t*	  	rec);	/* in: physical record */
+/******************************************************************
+Compares a prefix of a data tuple to a prefix of a physical record for
+equality. If there are less fields in rec than parameter n_fields, FALSE
+is returned. NOTE that n_fields_cmp of dtuple does not affect this
+comparison. */
+
+ibool
+cmp_dtuple_rec_prefix_equal(
+/*========================*/
+				/* out: TRUE if equal */
+	dtuple_t*	dtuple,	/* in: data tuple */
+	rec_t*		rec,	/* in: physical record */
+	ulint		n_fields); /* in: number of fields which should be 
+				compared; must not exceed the number of 
+				fields in dtuple */
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+
+int
+cmp_rec_rec_with_match(
+/*===================*/	
+				/* out: 1, 0 , -1 if rec1 is greater, equal,
+				less, respectively, than rec2; only the common
+				first fields are compared */
+	rec_t*		rec1,	/* in: physical record */
+	rec_t*		rec2,	/* in: physical record */
+	dict_index_t*	index,	/* in: data dictionary index */
+	ulint*	 	matched_fields, /* in/out: number of already completely 
+				matched fields; when the function returns,
+				contains the value the for current
+				comparison */
+	ulint*	  	matched_bytes);/* in/out: number of already matched 
+				bytes within the first field not completely
+				matched; when the function returns, contains
+				the value for the current comparison */
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/	
+				/* out: 1, 0 , -1 if rec1 is greater, equal,
+				less, respectively, than rec2; only the common
+				first fields are compared */
+	rec_t*		rec1,	/* in: physical record */
+	rec_t*		rec2,	/* in: physical record */
+	dict_index_t*	index);	/* in: data dictionary index */
+
+
+#ifndef UNIV_NONINL
+#include "rem0cmp.ic"
+#endif
+
+#endif
diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic
new file mode 100644
index 00000000000..ebf513f538c
--- /dev/null
+++ b/innobase/include/rem0cmp.ic
@@ -0,0 +1,84 @@
+/***********************************************************************
+Comparison services for records
+
+(c) 1994-1996 Innobase Oy
+
+Created 7/1/1994 Heikki Tuuri
+************************************************************************/
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+
+int
+cmp_data_data_slow(
+/*===============*/	
+				/* out: 1, 0, -1, if data1 is greater, equal, 
+				less than data2, respectively */
+	dtype_t*	cur_type,/* in: data type of the fields */
+	byte*		data1,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
+	byte*		data2,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2);	/* in: data field length or UNIV_SQL_NULL */
+
+
+/*****************************************************************
+This function is used to compare two data fields for which we know the
+data type. */
+UNIV_INLINE
+int
+cmp_data_data(
+/*==========*/	
+				/* out: 1, 0, -1, if data1 is greater, equal, 
+				less than data2, respectively */
+	dtype_t*	cur_type,/* in: data type of the fields */
+	byte*		data1,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len1,	/* in: data field length or UNIV_SQL_NULL */
+	byte*		data2,	/* in: data field (== a pointer to a memory
+				buffer) */
+	ulint		len2)	/* in: data field length or UNIV_SQL_NULL */
+{
+	return(cmp_data_data_slow(cur_type, data1, len1, data2, len2));
+}
+
+/*****************************************************************
+This function is used to compare two dfields where at least the first
+has its data type field set. */
+UNIV_INLINE
+int
+cmp_dfield_dfield(
+/*==============*/	
+				/* out: 1, 0, -1, if dfield1 is greater, equal, 
+				less than dfield2, respectively */
+	dfield_t*	dfield1,/* in: data field; must have type field set */
+	dfield_t*	dfield2)/* in: data field */
+{
+	ut_ad(dfield_check_typed(dfield1));
+
+	return(cmp_data_data(dfield_get_type(dfield1),
+			dfield_get_data(dfield1), dfield_get_len(dfield1),
+			dfield_get_data(dfield2), dfield_get_len(dfield2)));
+}
+
+/*****************************************************************
+This function is used to compare two physical records. Only the common
+first fields are compared. */
+UNIV_INLINE
+int
+cmp_rec_rec(
+/*========*/	
+				/* out: 1, 0 , -1 if rec1 is greater, equal,
+				less, respectively, than rec2; only the common
+				first fields are compared */
+	rec_t*		rec1,	/* in: physical record */
+	rec_t*		rec2,	/* in: physical record */
+	dict_index_t*	index)	/* in: data dictionary index */
+{
+	ulint	match_f		= 0;
+	ulint	match_b		= 0;
+	
+	return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b));
+}
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
new file mode 100644
index 00000000000..62c0aa14519
--- /dev/null
+++ b/innobase/include/rem0rec.h
@@ -0,0 +1,357 @@
+/************************************************************************
+Record manager
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0rec_h
+#define rem0rec_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "rem0types.h"
+
+/* Maximum values for various fields (for non-blob tuples) */
+#define REC_MAX_N_FIELDS	(1024 - 1)
+#define REC_MAX_HEAP_NO		(2 * 8192 - 1)
+#define REC_MAX_N_OWNED		(16 - 1)
+
+/* Flag denoting the predefined minimum record: this bit is ORed in the 4
+info bits of a record */
+#define REC_INFO_MIN_REC_FLAG	0x10
+
+/* Number of extra bytes in a record, in addition to the data and the
+offsets */
+#define REC_N_EXTRA_BYTES	6
+
+/**********************************************************
+The following function is used to get the offset of the
+next chained record on the same page. */
+UNIV_INLINE
+ulint 
+rec_get_next_offs(
+/*==============*/
+			/* out: the page offset of the next 
+			chained record */
+	rec_t*	rec);	/* in: physical record */
+/**********************************************************
+The following function is used to set the next record offset field
+of the record. */
+UNIV_INLINE
+void
+rec_set_next_offs(
+/*==============*/
+	rec_t*	rec,	/* in: physical record */
+	ulint	next);	/* in: offset of the next record */
+/**********************************************************
+The following function is used to get the number of fields
+in the record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+			/* out: number of data fields */
+	rec_t*	rec);	/* in: physical record */
+/**********************************************************
+The following function is used to get the number of records
+owned by the previous directory record. */
+UNIV_INLINE
+ulint
+rec_get_n_owned(
+/*============*/
+			/* out: number of owned records */
+	rec_t*	rec);	/* in: physical record */
+/**********************************************************
+The following function is used to set the number of owned
+records. */
+UNIV_INLINE
+void
+rec_set_n_owned(
+/*============*/
+	rec_t*	rec,		/* in: physical record */
+	ulint	n_owned);	/* in: the number of owned */
+/**********************************************************
+The following function is used to retrieve the info bits of
+a record. */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+			/* out: info bits */
+	rec_t*	rec);	/* in: physical record */
+/**********************************************************
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits(
+/*==============*/
+	rec_t*	rec,	/* in: physical record */
+	ulint	bits);	/* in: info bits */
+/**********************************************************
+Gets the value of the deleted falg in info bits. */
+UNIV_INLINE
+ibool
+rec_info_bits_get_deleted_flag(
+/*===========================*/
+				/* out: TRUE if deleted flag set */
+	ulint	info_bits);	/* in: info bits from a record */
+/**********************************************************
+The following function tells if record is delete marked. */
+UNIV_INLINE
+ibool
+rec_get_deleted_flag(
+/*=================*/
+			/* out: TRUE if delete marked */
+	rec_t*	rec);	/* in: physical record */
+/**********************************************************
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag(
+/*=================*/
+	rec_t*	rec,	/* in: physical record */
+	ibool	flag);	/* in: TRUE if delete marked */
+/**********************************************************
+The following function is used to get the order number
+of the record in the heap of the index page. */
+UNIV_INLINE
+ulint
+rec_get_heap_no(
+/*=============*/
+			/* out: heap order number */
+	rec_t*	rec);	/* in: physical record */
+/**********************************************************
+The following function is used to set the heap number
+field in the record. */
+UNIV_INLINE
+void
+rec_set_heap_no(
+/*=============*/
+	rec_t*	rec,	/* in: physical record */
+	ulint	heap_no);/* in: the heap number */
+/**********************************************************
+The following function is used to test whether the data offsets
+in the record are stored in one-byte or two-byte format. */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+			/* out: TRUE if 1-byte form */
+	rec_t*	rec);	/* in: physical record */
+/****************************************************************
+The following function is used to get a pointer to the nth
+data field in the record. */
+
+byte*
+rec_get_nth_field(
+/*==============*/
+ 			/* out: pointer to the field, NULL if SQL null */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n,	/* in: index of the field */
+	ulint*	len);	/* out: length of the field; UNIV_SQL_NULL 
+			if SQL null */
+/****************************************************************
+Gets the physical size of a field. Also an SQL null may have a field of
+size > 0, if the data type is of a fixed size. */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+			/* out: field size in bytes */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n);	/* in: index of the field */
+/****************************************************************
+The following function is used to get a copy of the nth
+data field in the record to a buffer. */
+UNIV_INLINE
+void
+rec_copy_nth_field(
+/*===============*/
+ 	void*	buf,	/* in: pointer to the buffer */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n,	/* in: index of the field */
+	ulint*	len);	/* out: length of the field; UNIV_SQL_NULL if SQL 
+			null */
+/*************************************************************** 
+This is used to modify the value of an already existing field in 
+a physical record. The previous value must have exactly the same 
+size as the new value. If len is UNIV_SQL_NULL then the field is 
+treated as SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+	rec_t*	rec, 	/* in: record */
+	ulint	n,	/* in: index of the field */
+	void*	data,	/* in: pointer to the data if not SQL null */
+	ulint	len);	/* in: length of the data or UNIV_SQL_NULL. 
+			If not SQL null, must have the same length as the
+			previous value. If SQL null, previous value must be
+			SQL null. */
+/************************************************************** 
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
+UNIV_INLINE
+ulint
+rec_get_data_size(
+/*==============*/
+			/* out: size */
+	rec_t*	rec);	/* in: physical record */
+/************************************************************** 
+Returns the total size of record minus data size of record.
+The value returned by the function is the distance from record 
+start to record origin in bytes. */
+UNIV_INLINE
+ulint
+rec_get_extra_size(
+/*===============*/
+			/* out: size */
+	rec_t*	rec);	/* in: physical record */
+/************************************************************** 
+Returns the total size of a physical record.  */
+UNIV_INLINE
+ulint
+rec_get_size(
+/*=========*/
+			/* out: size */
+	rec_t*	rec);	/* in: physical record */
+/**************************************************************
+Returns a pointer to the start of the record. */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+			/* out: pointer to start */
+	rec_t*	rec);	/* in: pointer to record */
+/**************************************************************
+Returns a pointer to the end of the record. */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+			/* out: pointer to end */
+	rec_t*	rec);	/* in: pointer to record */
+/*******************************************************************
+Copies a physical record to a buffer. */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+			/* out: pointer to the origin of the copied record */
+	void*	buf,	/* in: buffer */
+	rec_t*	rec);	/* in: physical record */
+/******************************************************************
+Copies the first n fields of a physical record to a new physical record in
+a buffer. */
+
+rec_t*
+rec_copy_prefix_to_buf(
+/*===================*/
+				/* out, own: copied record */
+	rec_t*	rec,		/* in: physical record */
+	ulint	n_fields,	/* in: number of fields to copy */
+	byte**	buf,		/* in/out: memory buffer for the copied prefix,
+				or NULL */
+	ulint*	buf_size);	/* in/out: buffer size */
+/****************************************************************
+Folds a prefix of a physical record to a ulint. */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+				/* out: the folded value */
+	rec_t*	rec,		/* in: the physical record */
+	ulint	n_fields,	/* in: number of complete fields to fold */
+	ulint	n_bytes,	/* in: number of bytes to fold in an
+				incomplete last field */
+	dulint	tree_id);	/* in: index tree id */
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+address destination. */
+UNIV_INLINE
+rec_t* 	
+rec_convert_dtuple_to_rec(
+/*======================*/			
+				/* out: pointer to the origin of physical
+				record */
+	byte*	destination,	/* in: start address of the physical record */
+	dtuple_t* dtuple);	/* in: data tuple */
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+address destination. */
+
+rec_t* 	
+rec_convert_dtuple_to_rec_low(
+/*==========================*/			
+				/* out: pointer to the origin of physical
+				record */
+	byte*	destination,	/* in: start address of the physical record */
+	dtuple_t* dtuple,	/* in: data tuple */
+	ulint	data_size);	/* in: data size of dtuple */
+/**************************************************************
+Returns the extra size of a physical record if we know its
+data size and number of fields. */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+				/* out: extra size */
+	ulint	data_size,	/* in: data size */
+	ulint	n_fields);	/* in: number of fields */
+/**************************************************************
+The following function returns the size of a data tuple when converted to
+a physical record. */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+				/* out: size */
+	dtuple_t*	dtuple);/* in: data tuple */
+/******************************************************************
+Copies the first n fields of a physical record to a data tuple.
+The fields are copied to the memory heap. */
+
+void
+rec_copy_prefix_to_dtuple(
+/*======================*/
+	dtuple_t*	tuple,		/* in: data tuple */
+	rec_t*		rec,		/* in: physical record */
+	ulint		n_fields,	/* in: number of fields to copy */
+	mem_heap_t*	heap);		/* in: memory heap */
+/*******************************************************************
+Validates the consistency of a physical record. */
+
+ibool
+rec_validate(
+/*=========*/
+			/* out: TRUE if ok */
+	rec_t*	rec);	/* in: physical record */
+/*******************************************************************
+Prints a physical record. */
+
+void
+rec_print(
+/*======*/
+	rec_t*	rec);	/* in: physical record */
+/*******************************************************************
+Prints a physical record to a buffer. */
+
+ulint
+rec_sprintf(
+/*========*/
+			/* out: printed length in bytes */
+	char*	buf,	/* in: buffer to print to */
+	ulint	buf_len,/* in: buffer length */
+	rec_t*	rec);	/* in: physical record */
+
+#define REC_INFO_BITS		6	/* This is single byte bit-field */
+
+#ifndef UNIV_NONINL
+#include "rem0rec.ic"
+#endif
+
+#endif
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
new file mode 100644
index 00000000000..c63b25374dd
--- /dev/null
+++ b/innobase/include/rem0rec.ic
@@ -0,0 +1,959 @@
+/************************************************************************
+Record manager
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mach0data.h"
+#include "ut0byte.h"
+
+/* Offsets of the bit-fields in the record. NOTE! In the table the most 
+significant bytes and bits are written below less significant.
+
+	(1) byte offset		(2) bit usage within byte
+	downward from
+	origin ->	1	8 bits pointer to next record
+			2	8 bits pointer to next record
+			3  	1 bit short flag
+				7 bits number of fields
+			4	3 bits number of fields
+				5 bits heap number
+			5	8 bits heap number
+			6	4 bits n_owned
+				4 bits info bits
+*/
+
+
+/* Maximum lengths for the data in a physical record if the offsets
+are given as one byte (resp. two byte) format. */
+#define REC_1BYTE_OFFS_LIMIT	0x7F
+#define REC_2BYTE_OFFS_LIMIT	0x7FFF
+
+/* We list the byte offsets from the origin of the record, the mask,
+and the shift needed to obtain each bit-field of the record. */
+
+#define REC_NEXT		2
+#define REC_NEXT_MASK		0xFFFF
+#define REC_NEXT_SHIFT		0
+
+#define REC_SHORT		3	/* This is single byte bit-field */
+#define	REC_SHORT_MASK		0x1
+#define REC_SHORT_SHIFT		0
+
+#define	REC_N_FIELDS		4
+#define REC_N_FIELDS_MASK	0x7FE
+#define	REC_N_FIELDS_SHIFT	1
+
+#define	REC_HEAP_NO		5
+#define REC_HEAP_NO_MASK	0xFFF8
+#define	REC_HEAP_NO_SHIFT	3
+
+#define REC_N_OWNED		6	/* This is single byte bit-field */
+#define	REC_N_OWNED_MASK	0xF
+#define REC_N_OWNED_SHIFT	0
+
+#define	REC_INFO_BITS_MASK	0xF0
+#define REC_INFO_BITS_SHIFT	0
+
+/* The deleted flag in info bits */
+#define REC_INFO_DELETED_FLAG 	0x20	/* when bit is set to 1, it means the
+					record has been delete marked */
+/* The following masks are used to filter the SQL null bit from
+one-byte and two-byte offsets */
+
+#define REC_1BYTE_SQL_NULL_MASK	0x80
+#define REC_2BYTE_SQL_NULL_MASK	0x8000
+
+/***************************************************************
+Sets the value of the ith field SQL null bit. */
+
+void
+rec_set_nth_field_null_bit(
+/*=======================*/
+	rec_t*	rec,	/* in: record */
+	ulint	i,	/* in: ith field */
+	ibool	val);	/* in: value to set */
+/*************************************************************** 
+Sets a record field to SQL null. The physical size of the field is not
+changed. */
+
+void
+rec_set_nth_field_sql_null(
+/*=======================*/
+	rec_t*	rec, 	/* in: record */
+	ulint	n);	/* in: index of the field */
+
+/**********************************************************
+Gets a bit field from within 1 byte. */
+UNIV_INLINE
+ulint
+rec_get_bit_field_1(
+/*================*/
+	rec_t*	rec,	/* in: pointer to record origin */
+	ulint	offs,	/* in: offset from the origin down */
+	ulint	mask,	/* in: mask used to filter bits */
+	ulint	shift)	/* in: shift right applied after masking */
+{
+	ut_ad(rec);
+
+	return((mach_read_from_1(rec - offs) & mask) >> shift);
+}
+
+/**********************************************************
+Sets a bit field within 1 byte. */
+UNIV_INLINE
+void
+rec_set_bit_field_1(
+/*================*/
+	rec_t*	rec,	/* in: pointer to record origin */
+	ulint	val,	/* in: value to set */
+	ulint	offs,	/* in: offset from the origin down */
+	ulint	mask,	/* in: mask used to filter bits */
+	ulint	shift)	/* in: shift right applied after masking */
+{
+	ut_ad(rec);
+	ut_ad(offs <= REC_N_EXTRA_BYTES);
+	ut_ad(mask);
+	ut_ad(mask <= 0xFF);
+	ut_ad(((mask >> shift) << shift) == mask);
+	ut_ad(((val << shift) & mask) == (val << shift));
+	
+	mach_write_to_1(rec - offs, 
+			(mach_read_from_1(rec - offs) & ~mask) 
+			| (val << shift));
+}
+
+/**********************************************************
+Gets a bit field from within 2 bytes. */
+UNIV_INLINE
+ulint
+rec_get_bit_field_2(
+/*================*/
+	rec_t*	rec,	/* in: pointer to record origin */
+	ulint	offs,	/* in: offset from the origin down */
+	ulint	mask,	/* in: mask used to filter bits */
+	ulint	shift)	/* in: shift right applied after masking */
+{
+	ut_ad(rec);
+
+	return((mach_read_from_2(rec - offs) & mask) >> shift);
+}
+
+/**********************************************************
+Sets a bit field within 2 bytes. */
+UNIV_INLINE
+void
+rec_set_bit_field_2(
+/*================*/
+	rec_t*	rec,	/* in: pointer to record origin */
+	ulint	val,	/* in: value to set */
+	ulint	offs,	/* in: offset from the origin down */
+	ulint	mask,	/* in: mask used to filter bits */
+	ulint	shift)	/* in: shift right applied after masking */
+{
+	ut_ad(rec);
+	ut_ad(offs <= REC_N_EXTRA_BYTES);
+	ut_ad(mask > 0xFF);
+	ut_ad(mask <= 0xFFFF);
+	ut_ad((mask >> shift) & 1);
+	ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
+	ut_ad(((mask >> shift) << shift) == mask);
+	ut_ad(((val << shift) & mask) == (val << shift));
+#ifdef UNIV_DEBUG
+      {
+	ulint	m;
+
+	/* The following assertion checks that the masks of currently
+	defined bit-fields in bytes 3-6 do not overlap. */
+	m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3)))
+			   + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4)))
+			   + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4)))
+			   + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3)))
+			   + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3))));
+	if (m != ut_dbg_zero + 0xFFFFFFFF) {
+		printf("Sum of masks %lx\n", m);
+		ut_error;
+	}
+      }
+#endif	
+	mach_write_to_2(rec - offs, 
+			(mach_read_from_2(rec - offs) & ~mask) 
+			| (val << shift));
+}
+
+/**********************************************************
+The following function is used to get the offset of the next chained record
+on the same page. */
+UNIV_INLINE
+ulint 
+rec_get_next_offs(
+/*==============*/
+			/* out: the page offset of the next chained record */
+	rec_t*	rec)	/* in: physical record */
+{	
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK,
+							REC_NEXT_SHIFT);
+	ut_ad(ret < UNIV_PAGE_SIZE);
+
+	return(ret);
+}
+
+/**********************************************************
+The following function is used to set the next record offset field of the
+record. */
+UNIV_INLINE
+void
+rec_set_next_offs(
+/*==============*/
+	rec_t*	rec,	/* in: physical record */
+	ulint	next)	/* in: offset of the next record */
+{
+	ut_ad(rec);
+	ut_ad(UNIV_PAGE_SIZE > next);
+
+	rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK,
+							REC_NEXT_SHIFT);
+}
+
+/**********************************************************
+The following function is used to get the number of fields in the record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+			/* out: number of data fields */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK,
+							REC_N_FIELDS_SHIFT);
+	ut_ad(ret <= REC_MAX_N_FIELDS);
+	ut_ad(ret > 0);
+
+	return(ret);
+}	
+
+/**********************************************************
+The following function is used to set the number of fields field in the
+record. */
+UNIV_INLINE
+void
+rec_set_n_fields(
+/*=============*/
+	rec_t*	rec,		/* in: physical record */
+	ulint	n_fields)	/* in: the number of fields */
+{
+	ut_ad(rec);
+	ut_ad(n_fields <= REC_MAX_N_FIELDS);
+	ut_ad(n_fields > 0);
+
+	rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK,
+							REC_N_FIELDS_SHIFT);
+}
+
+/**********************************************************
+The following function is used to get the number of records owned by the
+previous directory record. */
+UNIV_INLINE
+ulint
+rec_get_n_owned(
+/*============*/
+			/* out: number of owned records */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK,
+							REC_N_OWNED_SHIFT);
+	ut_ad(ret <= REC_MAX_N_OWNED); 
+
+	return(ret);
+}	
+
+/**********************************************************
+The following function is used to set the number of owned records. */
+UNIV_INLINE
+void
+rec_set_n_owned(
+/*============*/
+	rec_t*	rec,		/* in: physical record */
+	ulint	n_owned)	/* in: the number of owned */
+{
+	ut_ad(rec);
+	ut_ad(n_owned <= REC_MAX_N_OWNED);
+
+	rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK,
+							REC_N_OWNED_SHIFT);
+}
+
+/**********************************************************
+The following function is used to retrieve the info bits of a record. */
+UNIV_INLINE
+ulint
+rec_get_info_bits(
+/*==============*/
+			/* out: info bits */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK,
+							REC_INFO_BITS_SHIFT);
+	ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
+
+	return(ret);
+}	
+
+/**********************************************************
+The following function is used to set the info bits of a record. */
+UNIV_INLINE
+void
+rec_set_info_bits(
+/*==============*/
+	rec_t*	rec,	/* in: physical record */
+	ulint	bits)	/* in: info bits */
+{
+	ut_ad(rec);
+	ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
+
+	rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK,
+							REC_INFO_BITS_SHIFT);
+}
+
+/**********************************************************
+Gets the value of the deleted flag in info bits. */
+UNIV_INLINE
+ibool
+rec_info_bits_get_deleted_flag(
+/*===========================*/
+				/* out: TRUE if deleted flag set */
+	ulint	info_bits)	/* in: info bits from a record */
+{
+	if (info_bits & REC_INFO_DELETED_FLAG) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/**********************************************************
+The following function tells if record is delete marked. */
+UNIV_INLINE
+ibool
+rec_get_deleted_flag(
+/*=================*/
+			/* out: TRUE if delete marked */
+	rec_t*	rec)	/* in: physical record */
+{
+	if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/**********************************************************
+The following function is used to set the deleted bit. */
+UNIV_INLINE
+void
+rec_set_deleted_flag(
+/*=================*/
+	rec_t*	rec,	/* in: physical record */
+	ibool	flag)	/* in: TRUE if delete marked */
+{
+	ulint	old_val;
+	ulint	new_val;
+
+	ut_ad(TRUE == 1);
+	ut_ad(flag <= TRUE);
+
+	old_val = rec_get_info_bits(rec);
+	
+	if (flag) {
+		new_val = REC_INFO_DELETED_FLAG | old_val;
+	} else {
+		new_val = ~REC_INFO_DELETED_FLAG & old_val;
+	}
+
+	rec_set_info_bits(rec, new_val);
+}
+
+/**********************************************************
+The following function is used to get the order number of the record in the
+heap of the index page. */
+UNIV_INLINE
+ulint
+rec_get_heap_no(
+/*=============*/
+			/* out: heap order number */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	ret;
+
+	ut_ad(rec);
+
+	ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK,
+							REC_HEAP_NO_SHIFT);
+	ut_ad(ret <= REC_MAX_HEAP_NO);
+
+	return(ret);
+}	
+
+/**********************************************************
+The following function is used to set the heap number field in the record. */
+UNIV_INLINE
+void
+rec_set_heap_no(
+/*=============*/
+	rec_t*	rec,	/* in: physical record */
+	ulint	heap_no)/* in: the heap number */
+{
+	ut_ad(heap_no <= REC_MAX_HEAP_NO);
+
+	rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK,
+							REC_HEAP_NO_SHIFT);
+}
+
+/**********************************************************
+The following function is used to test whether the data offsets in the record
+are stored in one-byte or two-byte format. */
+UNIV_INLINE
+ibool
+rec_get_1byte_offs_flag(
+/*====================*/
+			/* out: TRUE if 1-byte form */
+	rec_t*	rec)	/* in: physical record */
+{
+	ut_ad(TRUE == 1);
+
+	return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK,
+							REC_SHORT_SHIFT));
+}
+
+/**********************************************************
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+	rec_t*	rec,	/* in: physical record */
+	ibool	flag)	/* in: TRUE if 1byte form */
+{
+	ut_ad(TRUE == 1);
+	ut_ad(flag <= TRUE);
+
+	rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK,
+							REC_SHORT_SHIFT);
+}
+
+/**********************************************************
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+ 			/* out: offset of the start of the field, SQL null
+ 			flag ORed */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields(rec));
+
+	return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1)));
+}
+						
+/**********************************************************
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+ 			/* out: offset of the start of the field, SQL null
+ 			flag ORed */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields(rec));
+
+	return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
+}
+
+/**********************************************************
+Returns the offset of n - 1th field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. This function and the 2-byte counterpart are defined here because the
+C-compilerwas not able to sum negative and positive constant offsets, and
+warned of constant arithmetic overflow within the compiler. */
+UNIV_INLINE
+ulint
+rec_1_get_prev_field_end_info(
+/*==========================*/
+ 			/* out: offset of the start of the PREVIOUS field, SQL
+			null flag ORed */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields(rec));
+
+	return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n)));
+}
+						
+/**********************************************************
+Returns the offset of n - 1th field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value. */
+UNIV_INLINE
+ulint
+rec_2_get_prev_field_end_info(
+/*==========================*/
+ 			/* out: offset of the start of the PREVIOUS field, SQL
+			null flag ORed */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields(rec));
+
+	return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n)));
+}
+
+/**********************************************************
+Sets the field end info for the nth field if the record is stored in the
+1-byte format. */
+UNIV_INLINE
+void
+rec_1_set_field_end_info(
+/*=====================*/
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n,	/* in: field index */
+ 	ulint	info)	/* in: value to set */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields(rec));
+
+	mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info);
+}
+
+/**********************************************************
+Sets the field end info for the nth field if the record is stored in the
+2-byte format. */
+UNIV_INLINE
+void
+rec_2_set_field_end_info(
+/*=====================*/
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n,	/* in: field index */
+ 	ulint	info)	/* in: value to set */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n < rec_get_n_fields(rec));
+
+	mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info);
+}
+
+/**********************************************************
+Returns the offset of nth field start if the record is stored in the 1-byte
+offsets form. */
+UNIV_INLINE
+ulint
+rec_1_get_field_start_offs(
+/*=======================*/
+ 			/* out: offset of the start of the field */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields(rec));
+
+	if (n == 0) {
+
+		return(0);
+	}
+
+	return(rec_1_get_prev_field_end_info(rec, n)
+						& ~REC_1BYTE_SQL_NULL_MASK);
+}
+						
+/**********************************************************
+Returns the offset of nth field start if the record is stored in the 2-byte
+offsets form. */
+UNIV_INLINE
+ulint
+rec_2_get_field_start_offs(
+/*=======================*/
+ 			/* out: offset of the start of the field */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(!rec_get_1byte_offs_flag(rec));
+	ut_ad(n <= rec_get_n_fields(rec));
+
+	if (n == 0) {
+
+		return(0);
+	}
+
+	return(rec_2_get_prev_field_end_info(rec, n)
+						& ~REC_2BYTE_SQL_NULL_MASK);
+}
+						
+/**********************************************************
+The following function is used to read the offset of the start of a data field
+in the record. The start of an SQL null field is the end offset of the
+previous non-null field, or 0, if none exists. If n is the number of the last
+field + 1, then the end offset of the last field is returned. */
+UNIV_INLINE
+ulint
+rec_get_field_start_offs(
+/*=====================*/
+ 			/* out: offset of the start of the field */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: field index */
+{
+	ut_ad(rec);
+	ut_ad(n <= rec_get_n_fields(rec));
+
+	if (n == 0) {
+
+		return(0);
+	}
+
+	if (rec_get_1byte_offs_flag(rec)) {
+
+		return(rec_1_get_field_start_offs(rec, n));
+	}
+
+	return(rec_2_get_field_start_offs(rec, n));
+}
+
+/****************************************************************
+Gets the physical size of a field. Also an SQL null may have a field of
+size > 0, if the data type is of a fixed size. */
+UNIV_INLINE
+ulint
+rec_get_nth_field_size(
+/*===================*/
+			/* out: field size in bytes */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n)	/* in: index of the field */
+{
+	ulint	os;
+	ulint	next_os;
+
+	os = rec_get_field_start_offs(rec, n);
+	next_os = rec_get_field_start_offs(rec, n + 1);
+
+	ut_ad(next_os - os < UNIV_PAGE_SIZE);
+
+	return(next_os - os);
+}
+
+/****************************************************************
+The following function is used to get a copy of the nth data field in a
+record to a buffer. */
+UNIV_INLINE
+void
+rec_copy_nth_field(
+/*===============*/
+ 	void*	buf,	/* in: pointer to the buffer */
+ 	rec_t*	rec, 	/* in: record */
+ 	ulint	n,	/* in: index of the field */
+	ulint*	len)	/* out: length of the field; UNIV_SQL_NULL if SQL 
+			null */
+{
+	byte*	ptr;
+	
+	ut_ad(buf && rec && len);
+
+	ptr = rec_get_nth_field(rec, n, len);
+
+	if (*len == UNIV_SQL_NULL) {
+
+		return;
+	}
+
+	ut_memcpy(buf, ptr, *len);
+}
+
+/*************************************************************** 
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null. */
+UNIV_INLINE
+void
+rec_set_nth_field(
+/*==============*/
+	rec_t*	rec, 	/* in: record */
+	ulint	n,	/* in: index of the field */
+	void*	data,	/* in: pointer to the data if not SQL null */
+	ulint	len)	/* in: length of the data or UNIV_SQL_NULL */
+{
+	byte*	data2;
+	ulint	len2;
+
+	ut_ad((len == UNIV_SQL_NULL)
+				|| (rec_get_nth_field_size(rec, n) == len));
+	
+	if (len == UNIV_SQL_NULL) {
+		rec_set_nth_field_sql_null(rec, n);
+
+		return;
+	}
+
+	data2 = rec_get_nth_field(rec, n, &len2);
+
+	ut_memcpy(data2, data, len);
+
+	if (len2 == UNIV_SQL_NULL) {
+	
+		rec_set_nth_field_null_bit(rec, n, FALSE);
+	}
+}
+
+/************************************************************** 
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
+UNIV_INLINE
+ulint
+rec_get_data_size(
+/*==============*/
+			/* out: size */
+	rec_t*	rec)	/* in: physical record */
+{
+	ut_ad(rec);
+
+	return(rec_get_field_start_offs(rec, rec_get_n_fields(rec)));
+}
+
+/************************************************************** 
+Returns the total size of record minus data size of record. The value
+returned by the function is the distance from record start to record origin
+in bytes. */
+UNIV_INLINE
+ulint
+rec_get_extra_size(
+/*===============*/
+			/* out: size */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	n_fields;
+	
+	ut_ad(rec);
+
+	n_fields = rec_get_n_fields(rec);
+
+	if (rec_get_1byte_offs_flag(rec)) {
+
+		return(REC_N_EXTRA_BYTES + n_fields);
+	}
+
+	return(REC_N_EXTRA_BYTES + 2 * n_fields);
+}
+
+/************************************************************** 
+Returns the total size of a physical record.  */
+UNIV_INLINE
+ulint
+rec_get_size(
+/*=========*/
+			/* out: size */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	n_fields;
+	
+	ut_ad(rec);
+
+	n_fields = rec_get_n_fields(rec);
+
+	if (rec_get_1byte_offs_flag(rec)) {
+
+		return(REC_N_EXTRA_BYTES + n_fields
+				+ rec_1_get_field_start_offs(rec, n_fields));
+	}
+	
+	return(REC_N_EXTRA_BYTES + 2 * n_fields
+				+ rec_2_get_field_start_offs(rec, n_fields));
+}
+	
+/**************************************************************
+Returns a pointer to the end of the record. */
+UNIV_INLINE
+byte*
+rec_get_end(
+/*========*/
+			/* out: pointer to end */
+	rec_t*	rec)	/* in: pointer to record */
+{
+	return(rec + rec_get_data_size(rec));
+}
+
+/**************************************************************
+Returns a pointer to the start of the record. */
+UNIV_INLINE
+byte*
+rec_get_start(
+/*==========*/
+			/* out: pointer to start */
+	rec_t*	rec)	/* in: pointer to record */
+{
+	return(rec - rec_get_extra_size(rec));
+}
+
+/*******************************************************************
+Copies a physical record to a buffer. */
+UNIV_INLINE
+rec_t*
+rec_copy(
+/*=====*/
+			/* out: pointer to the origin of the copied record */
+	void*	buf,	/* in: buffer */
+	rec_t*	rec)	/* in: physical record */
+{
+	ulint	extra_len;
+	ulint	data_len;
+	
+	ut_ad(rec && buf);
+	ut_ad(rec_validate(rec));
+
+	extra_len = rec_get_extra_size(rec);
+	data_len = rec_get_data_size(rec);
+
+	ut_memcpy(buf, rec - extra_len, extra_len + data_len);
+
+	return((byte*)buf + extra_len);
+}
+
+/**************************************************************
+Returns the extra size of a physical record if we know its data size and
+the number of fields. */
+UNIV_INLINE
+ulint
+rec_get_converted_extra_size(
+/*=========================*/
+				/* out: extra size */
+	ulint	data_size,	/* in: data size */
+	ulint	n_fields)	/* in: number of fields */
+{
+	if (data_size <= REC_1BYTE_OFFS_LIMIT) {
+
+		return(REC_N_EXTRA_BYTES + n_fields);
+	}
+
+	return(REC_N_EXTRA_BYTES + 2 * n_fields);
+}
+
+/**************************************************************
+The following function returns the size of a data tuple when converted to
+a physical record. */
+UNIV_INLINE
+ulint
+rec_get_converted_size(
+/*===================*/
+				/* out: size */
+	dtuple_t*	dtuple)	/* in: data tuple */
+{
+	ulint	data_size;
+	ulint	extra_size;
+	
+	ut_ad(dtuple);
+	ut_ad(dtuple_check_typed(dtuple));
+
+	data_size = dtuple_get_data_size(dtuple);
+
+	extra_size = rec_get_converted_extra_size(
+				data_size, dtuple_get_n_fields(dtuple));
+
+	return(data_size + extra_size);
+}
+
+/****************************************************************
+Folds a prefix of a physical record to a ulint. */
+UNIV_INLINE
+ulint
+rec_fold(
+/*=====*/
+				/* out: the folded value */
+	rec_t*	rec,		/* in: the physical record */
+	ulint	n_fields,	/* in: number of complete fields to fold */
+	ulint	n_bytes,	/* in: number of bytes to fold in an
+				incomplete last field */
+	dulint	tree_id)	/* in: index tree id */
+{
+	ulint	i;
+	byte*	data;
+	ulint	len;
+	ulint	fold;
+
+	ut_ad(rec_validate(rec));
+	ut_ad(n_fields <= rec_get_n_fields(rec));
+	ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0));
+	ut_ad(n_fields + n_bytes > 0);
+	/* Only the page supremum and infimum records have 1 field: */
+	ut_ad(rec_get_n_fields(rec) > 1);
+	
+	fold = ut_fold_dulint(tree_id);
+	
+	for (i = 0; i < n_fields; i++) {
+		data = rec_get_nth_field(rec, i, &len);
+
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		data = rec_get_nth_field(rec, i, &len);
+
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+		
+			fold = ut_fold_ulint_pair(fold,
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	return(fold);
+}
+
+/*************************************************************
+Builds a physical record out of a data tuple and stores it beginning from
+the address destination. */
+UNIV_INLINE
+rec_t* 	
+rec_convert_dtuple_to_rec(
+/*======================*/			
+				/* out: pointer to the origin of physical
+				record */
+	byte*	destination,	/* in: start address of the physical record */
+	dtuple_t* dtuple)	/* in: data tuple */
+{
+	return(rec_convert_dtuple_to_rec_low(destination, dtuple,
+						dtuple_get_data_size(dtuple)));
+}
diff --git a/innobase/include/rem0types.h b/innobase/include/rem0types.h
new file mode 100644
index 00000000000..94c394499c5
--- /dev/null
+++ b/innobase/include/rem0types.h
@@ -0,0 +1,16 @@
+/************************************************************************
+Record manager global types
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef rem0types_h
+#define rem0types_h
+
+/* We define the physical record simply as an array of bytes */
+typedef byte	rec_t;
+
+
+#endif
diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h
new file mode 100644
index 00000000000..94b0e8dec37
--- /dev/null
+++ b/innobase/include/row0ins.h
@@ -0,0 +1,142 @@
+/******************************************************
+Insert into a table
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0ins_h
+#define row0ins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+	
+/*************************************************************************
+Creates an insert node struct. */
+
+ins_node_t*
+ins_node_create(
+/*============*/
+					/* out, own: insert node struct */
+	ulint		ins_type,	/* in: INS_VALUES, ... */
+	dict_table_t*	table, 		/* in: table where to insert */
+	mem_heap_t*	heap);		/* in: mem heap where created */
+/*************************************************************************
+Sets a new row to insert for an INS_DIRECT node. This function is only used
+if we have constructed the row separately, which is a rare case; this
+function is quite slow. */
+
+void
+ins_node_set_new_row(
+/*=================*/
+	ins_node_t*	node,	/* in: insert node */
+	dtuple_t*	row);	/* in: new row (or first row) for the node */
+/*******************************************************************
+Tries to insert an index entry to an index. If the index is clustered
+and a record with the same unique key is found, the other record is
+necessarily marked deleted by a committed transaction, or a unique key
+violation error occurs. The delete marked record is then updated to an
+existing record, and we must write an undo log record on the delete
+marked record. If the index is secondary, and a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index. */
+
+ulint
+row_ins_index_entry_low(
+/*====================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
+				if pessimistic retry needed, or error code */
+	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+				depending on whether we wish optimistic or
+				pessimistic descent down the index tree */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	entry,	/* in: index entry to insert */
+	que_thr_t*	thr);	/* in: query thread */
+/*******************************************************************
+Inserts an index entry to index. Tries first optimistic, then pessimistic
+descent down the tree. If the entry matches enough to a delete marked record,
+performs the insert by updating or delete unmarking the delete marked
+record. */
+
+ulint
+row_ins_index_entry(
+/*================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DUPLICATE_KEY, or some other error code */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	entry,	/* in: index entry to insert */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************
+Inserts a row to a table. */
+
+ulint
+row_ins(
+/*====*/
+				/* out: DB_SUCCESS if operation successfully
+				completed, else error code or DB_LOCK_WAIT */
+	ins_node_t*	node,	/* in: row insert node */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************
+Inserts a row to a table. This is a high-level function used in
+SQL execution graphs. */
+
+que_thr_t*
+row_ins_step(
+/*=========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+
+/* Insert node structure */
+
+struct ins_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_INSERT */
+	ulint		ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
+	dtuple_t*	row;	/* row to insert */
+	dict_table_t*	table;	/* table where to insert */
+	sel_node_t*	select;	/* select in searched insert */
+	que_node_t*	values_list;/* list of expressions to evaluate and
+				insert in an INS_VALUES insert */
+	ulint		state;	/* node execution state */
+	dict_index_t*	index;	/* NULL, or the next index where the index
+				entry should be inserted */
+	dtuple_t*	entry;	/* NULL, or entry to insert in the index;
+				after a successful insert of the entry,
+				this should be reset to NULL */
+	UT_LIST_BASE_NODE_T(dtuple_t)
+			entry_list;/* list of entries, one for each index */
+	byte*		row_id_buf;/* buffer for the row id sys field in row */
+	dulint		trx_id;	/* trx id or the last trx which executed the
+				node */
+	byte*		trx_id_buf;/* buffer for the trx id sys field in row */
+	mem_heap_t*	entry_sys_heap;
+				/* memory heap used as auxiliary storage;
+				entry_list and sys fields are stored here;
+				if this is NULL, entry list should be created
+				and buffers for sys fields in row allocated */
+	ulint		magic_n;
+};
+
+#define	INS_NODE_MAGIC_N	15849075
+
+/* Insert node types */
+#define INS_SEARCHED	0	/* INSERT INTO ... SELECT ... */
+#define INS_VALUES	1	/* INSERT INTO ... VALUES ... */
+#define INS_DIRECT	2	/* this is for internal use in dict0crea:
+				insert the row directly */
+
+/* Node execution states */
+#define	INS_NODE_SET_IX_LOCK	1	/* we should set an IX lock on table */
+#define INS_NODE_ALLOC_ROW_ID	2	/* row id should be allocated */
+#define	INS_NODE_INSERT_ENTRIES 3	/* index entries should be built and
+					inserted */
+
+#ifndef UNIV_NONINL
+#include "row0ins.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0ins.ic b/innobase/include/row0ins.ic
new file mode 100644
index 00000000000..80a232d41ee
--- /dev/null
+++ b/innobase/include/row0ins.ic
@@ -0,0 +1,9 @@
+/******************************************************
+Insert into a table
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
new file mode 100644
index 00000000000..ee631bc02dc
--- /dev/null
+++ b/innobase/include/row0mysql.h
@@ -0,0 +1,359 @@
+/******************************************************
+Interface between Innobase row operations and MySQL.
+Contains also create table and other data dictionary operations.
+
+(c) 2000 Innobase Oy
+
+Created 9/17/2000 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0mysql_h
+#define row0mysql_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "btr0pcur.h"
+#include "trx0types.h"
+
+typedef struct row_prebuilt_struct row_prebuilt_t;
+
+/***********************************************************************
+Stores a variable-length field (like VARCHAR) length to dest, in the
+MySQL format. */
+UNIV_INLINE
+byte*
+row_mysql_store_var_len(
+/*====================*/
+			/* out: dest + 2 */
+	byte*	dest,	/* in: where to store */
+	ulint	len);	/* in: length, must fit in two bytes */
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. */
+UNIV_INLINE
+byte*
+row_mysql_read_var_ref(
+/*===================*/
+			/* out: field + 2 */
+	ulint*	len,	/* out: variable-length field length */
+	byte*	field);	/* in: field */
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. */
+
+byte*
+row_mysql_read_var_ref_noninline(
+/*=============================*/
+			/* out: field + 2 */
+	ulint*	len,	/* out: variable-length field length */
+	byte*	field);	/* in: field */
+/***********************************************************************
+Stores a reference to a BLOB in the MySQL format. */
+
+void
+row_mysql_store_blob_ref(
+/*=====================*/
+	byte*	dest,		/* in: where to store */
+	ulint	col_len,	/* in: dest buffer size: determines into
+				how many bytes the BLOB length is stored,
+				this may vary from 1 to 4 bytes */
+	byte*	data,		/* in: BLOB data */
+	ulint	len);		/* in: BLOB length */
+/***********************************************************************
+Reads a reference to a BLOB in the MySQL format. */
+
+byte*
+row_mysql_read_blob_ref(
+/*====================*/
+				/* out: pointer to BLOB data */
+	ulint*	len,		/* out: BLOB length */
+	byte*	ref,		/* in: BLOB reference in the MySQL format */
+	ulint	col_len);	/* in: BLOB reference length (not BLOB
+				length) */
+/******************************************************************
+Stores a non-SQL-NULL field given in the MySQL format in the Innobase
+format. */
+UNIV_INLINE
+void
+row_mysql_store_col_in_innobase_format(
+/*===================================*/
+	dfield_t*	dfield,		/* in/out: dfield */
+	byte*		buf,		/* in/out: buffer for the converted
+					value */
+	byte*		mysql_data,	/* in: MySQL column value, not
+					SQL NULL; NOTE that dfield may also
+					get a pointer to mysql_data,
+					therefore do not discard this as long
+					as dfield is used! */
+	ulint		col_len,	/* in: MySQL column length */
+	ulint		type,		/* in: data type */
+	ulint		is_unsigned);	/* in: != 0 if unsigned integer type */
+/********************************************************************
+Handles user errors and lock waits detected by the database engine. */
+
+ibool
+row_mysql_handle_errors(
+/*====================*/
+				/* out: TRUE if it was a lock wait and
+				we should continue running the query thread */
+	ulint*		new_err,/* out: possible new error encountered in
+				rollback, or the old error which was
+				during the function entry */
+	trx_t*		trx,	/* in: transaction */
+	que_thr_t*	thr,	/* in: query thread */
+	trx_savept_t*	savept);/* in: savepoint */
+/************************************************************************
+Create a prebuilt struct for a MySQL table handle. */
+
+row_prebuilt_t*
+row_create_prebuilt(
+/*================*/
+				/* out, own: a prebuilt struct */
+	dict_table_t*	table);	/* in: Innobase table handle */
+/************************************************************************
+Free a prebuilt struct for a MySQL table handle. */
+
+void
+row_prebuilt_free(
+/*==============*/
+	row_prebuilt_t*	prebuilt);	/* in, own: prebuilt struct */
+/*************************************************************************
+Updates the transaction pointers in query graphs stored in the prebuilt
+struct. */
+
+void
+row_update_prebuilt_trx(
+/*====================*/
+					/* out: prebuilt dtuple */
+	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct in MySQL
+					handle */
+	trx_t*		trx);		/* in: transaction handle */
+/*************************************************************************
+Does an insert for MySQL. */
+
+int
+row_insert_for_mysql(
+/*=================*/
+					/* out: error code or DB_SUCCESS */
+	byte*		mysql_rec,	/* in: row in the MySQL format */
+	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+					handle */
+/*************************************************************************
+Builds a dummy query graph used in selects. */
+
+void
+row_prebuild_sel_graph(
+/*===================*/
+	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+					handle */
+/*************************************************************************
+Gets pointer to a prebuilt update vector used in updates. If the update
+graph has not yet been built in the prebuilt struct, then this function
+first builds it. */
+
+upd_t*
+row_get_prebuilt_update_vector(
+/*===========================*/
+					/* out: prebuilt update vector */
+	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+					handle */
+/*************************************************************************
+Checks if a table is such that we automatically created a clustered
+index on it (on row id). */
+
+ibool
+row_table_got_default_clust_index(
+/*==============================*/
+	dict_table_t*	table);
+/*************************************************************************
+Does an update or delete of a row for MySQL. */
+
+int
+row_update_for_mysql(
+/*=================*/
+					/* out: error code or DB_SUCCESS */
+	byte*		mysql_rec,	/* in: the row to be updated, in
+					the MySQL format */
+	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct in MySQL
+					handle */
+/*************************************************************************
+Does a table creation operation for MySQL. */
+
+int
+row_create_table_for_mysql(
+/*=======================*/
+					/* out: error code or DB_SUCCESS */
+	dict_table_t*	table,		/* in: table definition */
+	trx_t*		trx);		/* in: transaction handle */
+/*************************************************************************
+Does an index creation operation for MySQL. TODO: currently failure
+to create an index results in dropping the whole table! This is no problem
+currently as all indexes must be created at the same time as the table. */
+
+int
+row_create_index_for_mysql(
+/*=======================*/
+					/* out: error number or DB_SUCCESS */
+	dict_index_t*	index,		/* in: index defintion */
+	trx_t*		trx);		/* in: transaction handle */
+/*************************************************************************
+Drops a table for MySQL. */
+
+int
+row_drop_table_for_mysql(
+/*=====================*/
+				/* out: error code or DB_SUCCESS */
+	char*	name,		/* in: table name */
+	trx_t*	trx,		/* in: transaction handle */
+	ibool	has_dict_mutex);/* in: TRUE if the caller already owns the
+				dictionary system mutex */
+/*************************************************************************
+Renames a table for MySQL. */
+
+int
+row_rename_table_for_mysql(
+/*=======================*/
+				/* out: error code or DB_SUCCESS */
+	char*	old_name,	/* in: old table name */
+	char*	new_name,	/* in: new table name */
+	trx_t*	trx);		/* in: transaction handle */
+
+/* A struct describing a place for an individual column in the MySQL
+row format which is presented to the table handler in ha_innobase.
+This template struct is used to speed up row transformations between
+Innobase and MySQL. */
+
+typedef struct mysql_row_templ_struct mysql_row_templ_t;
+struct mysql_row_templ_struct {
+	ulint	col_no;			/* column number of the column */
+	ulint	rec_field_no;		/* field number of the column in an
+					Innobase record in the current index;
+					not defined if template_type is
+					ROW_MYSQL_WHOLE_ROW */
+	ulint	mysql_col_offset;	/* offset of the column in the MySQL
+					row format */
+	ulint	mysql_col_len;		/* length of the column in the MySQL
+					row format */
+	ulint	mysql_null_byte_offset;	/* MySQL NULL bit byte offset in a
+					MySQL record */
+	ulint	mysql_null_bit_mask;	/* bit mask to get the NULL bit,
+					zero if column cannot be NULL */
+	ulint	type;			/* column type in Innobase mtype
+					numbers DATA_CHAR... */
+	ulint	is_unsigned;		/* if a column type is an integer
+					type and this field is != 0, then
+					it is an unsigned integer type */
+};
+
+#define MYSQL_FETCH_CACHE_SIZE		8
+/* After fetching this many rows, we start caching them in fetch_cache */
+#define MYSQL_FETCH_CACHE_THRESHOLD	4
+
+
+/* A struct for (sometimes lazily) prebuilt structures in an Innobase table
+handle used within MySQL; these are used to save CPU time. */
+
+struct row_prebuilt_struct {
+	dict_table_t*	table;		/* Innobase table handle */
+	trx_t*		trx;		/* current transaction handle */
+	ibool		sql_stat_start;	/* TRUE when we start processing of
+					an SQL statement: we may have to set
+					an intention lock on the table,
+					create a consistent read view etc. */
+	ibool		clust_index_was_generated;
+					/* if the user did not define a
+					primary key in MySQL, then Innobase
+					automatically generated a clustered
+					index where the ordering column is
+					the row id: in this case this flag
+					is set to TRUE */
+	dict_index_t*	index;		/* current index for a search, if any */
+	ulint		template_type;	/* ROW_MYSQL_WHOLE_ROW, 
+					ROW_MYSQL_REC_FIELDS or
+					ROW_MYSQL_NO_TEMPLATE */
+	ulint		n_template;	/* number of elements in the
+					template */
+	ulint		null_bitmap_len;/* number of bytes in the SQL NULL
+					bitmap at the start of a row in the
+					MySQL format */
+	ibool		need_to_access_clustered; /* if we are fetching
+					columns through a secondary index
+					and at least one column is not in
+					the secondary index, then this is
+					set to TRUE */
+	ibool		templ_contains_blob;/* TRUE if the template contains
+					BLOB column(s) */
+	mysql_row_templ_t* mysql_template;/* template used to transform
+					rows fast between MySQL and Innobase
+					formats; memory for this template
+					is not allocated from 'heap' */
+	mem_heap_t*	heap;		/* memory heap from which
+					these auxiliary structures are
+					allocated when needed */
+	ins_node_t*	ins_node;	/* Innobase SQL insert node
+					used to perform inserts
+					to the table */
+	byte*		ins_upd_rec_buff;/* buffer for storing data converted
+					to the Innobase format from the MySQL
+					format */
+	ibool		in_update_remember_pos;
+					/* if an update is processed, then if
+					this flag is set to TRUE, it means
+					that the stored cursor position in
+					SELECT is the right position also
+					for the update: we can just restore
+					the cursor and save CPU time */
+	upd_node_t*	upd_node;	/* Innobase SQL update node used
+					to perform updates and deletes */
+	que_fork_t*	ins_graph;	/* Innobase SQL query graph used
+					in inserts */
+	que_fork_t*	upd_graph;	/* Innobase SQL query graph used
+					in updates or deletes */
+	btr_pcur_t*	pcur;		/* persistent cursor used in selects
+					and updates */
+	btr_pcur_t*	clust_pcur;	/* persistent cursor used in
+					some selects and updates */
+	que_fork_t*	sel_graph;	/* dummy query graph used in
+					selects */
+	dtuple_t*	search_tuple;	/* prebuilt dtuple used in selects */
+	byte		row_id[DATA_ROW_ID_LEN];
+					/* if the clustered index was generated,
+					the row id of the last row fetched is
+					stored here */
+	dtuple_t*	clust_ref;	/* prebuilt dtuple used in
+					sel/upd/del */
+	ulint		select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */
+	ulint		mysql_row_len;	/* length in bytes of a row in the
+					MySQL format */
+	ulint		n_rows_fetched;	/* number of rows fetched after
+					positioning the current cursor */
+	ulint		fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */
+	byte*		fetch_cache[MYSQL_FETCH_CACHE_SIZE];
+					/* a cache for fetched rows if we
+					fetch many rows from the same cursor:
+					it saves CPU time to fetch them in a
+					batch; we reserve mysql_row_len
+					bytes for each such row */
+	ulint		fetch_cache_first;/* position of the first not yet
+					fetched row in fetch_cache */
+	ulint		n_fetch_cached;	/* number of not yet fetched rows
+					in fetch_cache */
+	mem_heap_t*	blob_heap;	/* in SELECTS BLOB fields are copied
+					to this heap */
+	mem_heap_t*	old_vers_heap;	/* memory heap where a previous
+					version is built in consistent read */
+};
+
+#define ROW_MYSQL_WHOLE_ROW	0
+#define ROW_MYSQL_REC_FIELDS	1
+#define ROW_MYSQL_NO_TEMPLATE	2
+
+#ifndef UNIV_NONINL
+#include "row0mysql.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic
new file mode 100644
index 00000000000..773e25a87ef
--- /dev/null
+++ b/innobase/include/row0mysql.ic
@@ -0,0 +1,97 @@
+/******************************************************
+MySQL interface for Innobase
+
+(C) 2001 Innobase Oy
+
+Created 1/23/2001 Heikki Tuuri
+*******************************************************/
+
+/***********************************************************************
+Stores a variable-length field (like VARCHAR) length to dest, in the
+MySQL format. No real var implemented in MySQL yet! */
+UNIV_INLINE
+byte*
+row_mysql_store_var_len(
+/*====================*/
+			/* out: dest + 2 */
+	byte*	dest,	/* in: where to store */
+	ulint	len)	/* in: length, must fit in two bytes */
+{
+	ut_ad(len < 256 * 256);
+/*	
+	mach_write_to_2_little_endian(dest, len);
+
+	return(dest + 2);
+*/
+	return(dest);	/* No real var implemented in MySQL yet! */
+}
+
+/***********************************************************************
+Reads a MySQL format variable-length field (like VARCHAR) length and
+returns pointer to the field data. No real var implemented in MySQL yet! */
+UNIV_INLINE
+byte*
+row_mysql_read_var_ref(
+/*===================*/
+			/* out: field + 2 */
+	ulint*	len,	/* out: variable-length field length; does not work
+			yet! */
+	byte*	field)	/* in: field */
+{
+/*	
+	*len = mach_read_from_2_little_endian(field);
+
+	return(field + 2);
+*/
+	return(field);	/* No real var implemented in MySQL yet! */
+}
+
+/******************************************************************
+Stores a non-SQL-NULL field given in the MySQL format in the Innobase
+format. */
+UNIV_INLINE
+void
+row_mysql_store_col_in_innobase_format(
+/*===================================*/
+	dfield_t*	dfield,		/* in/out: dfield */
+	byte*		buf,		/* in/out: buffer for the converted
+					value */
+	byte*		mysql_data,	/* in: MySQL column value, not
+					SQL NULL; NOTE that dfield may also
+					get a pointer to mysql_data,
+					therefore do not discard this as long
+					as dfield is used! */
+	ulint		col_len,	/* in: MySQL column length */
+	ulint		type,		/* in: data type */
+	ulint		is_unsigned)	/* in: != 0 if unsigned integer type */
+{
+	byte*	ptr 	= mysql_data;
+
+	if (type == DATA_INT) {
+		/* Store integer data in Innobase in a big-endian format,
+		sign bit negated */
+
+		ptr = buf + col_len;
+
+		for (;;) {
+			ptr--;
+			*ptr = *mysql_data;
+			if (ptr == buf) {
+				break;
+			}
+			mysql_data++;
+		}
+
+		if (!is_unsigned) {
+			*ptr = *ptr ^ 128;
+		}
+	} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
+						|| type == DATA_BINARY) {
+		ptr = row_mysql_read_var_ref(&col_len, mysql_data); 
+
+	} else if (type == DATA_BLOB) {
+		ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+	}
+
+	dfield_set_data(dfield, ptr, col_len);
+}
diff --git a/innobase/include/row0purge.h b/innobase/include/row0purge.h
new file mode 100644
index 00000000000..4c863441442
--- /dev/null
+++ b/innobase/include/row0purge.h
@@ -0,0 +1,80 @@
+/******************************************************
+Purge obsolete records
+
+(c) 1997 Innobase Oy
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0purge_h
+#define row0purge_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+	
+/************************************************************************
+Creates a purge node to a query graph. */
+
+purge_node_t*
+row_purge_node_create(
+/*==================*/
+				/* out, own: purge node */
+	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
+	mem_heap_t*	heap);	/* in: memory heap where created */
+/***************************************************************
+Does the purge operation for a single undo log record. This is a high-level
+function used in an SQL execution graph. */
+
+que_thr_t*
+row_purge_step(
+/*===========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+
+/* Purge node structure */
+
+struct purge_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_PURGE */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	dulint		roll_ptr;/* roll pointer to undo log record */
+	trx_undo_rec_t*	undo_rec;/* undo log record */
+	trx_undo_inf_t*	reservation;/* reservation for the undo log record in
+				the purge array */
+	dulint		undo_no;/* undo number of the record */
+	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+				... */
+	btr_pcur_t	pcur;	/* persistent cursor used in searching the
+				clustered index record */
+	ibool		found_clust;/* TRUE if the clustered index record
+				determined by ref was found in the clustered
+				index, and we were able to position pcur on
+				it */
+	dict_table_t*	table;	/* table where purge is done; NOTE that the
+				table has to be released explicitly with
+				dict_table_release */
+	ulint		cmpl_info;/* compiler analysis info of an update */
+	upd_t*		update;	/* update vector for a clustered index record */
+	dtuple_t*	ref;	/* NULL, or row reference to the next row to
+				handle */
+	dtuple_t*	row;	/* NULL, or a copy (also fields copied to
+				heap) of the indexed fields of the row to
+				handle */
+	dict_index_t*	index;	/* NULL, or the next index whose record should
+				be handled */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage for
+				row; this must be emptied after a successful
+				purge of a row */
+};
+
+#ifndef UNIV_NONINL
+#include "row0purge.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0purge.ic b/innobase/include/row0purge.ic
new file mode 100644
index 00000000000..50aabf0bc1b
--- /dev/null
+++ b/innobase/include/row0purge.ic
@@ -0,0 +1,8 @@
+
+/******************************************************
+Purge obsolete records
+
+(c) 1997 Innobase Oy
+
+Created 3/14/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h
new file mode 100644
index 00000000000..fb1e1b01ee3
--- /dev/null
+++ b/innobase/include/row0row.h
@@ -0,0 +1,266 @@
+/******************************************************
+General row routines
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0row_h
+#define row0row_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "mtr0mtr.h"	
+#include "rem0types.h"
+#include "read0types.h"
+#include "btr0types.h"
+
+/*************************************************************************
+Reads the trx id field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_trx_id(
+/*===============*/
+				/* out: value of the field */
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index);	/* in: clustered index */
+/*************************************************************************
+Reads the roll pointer field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_roll_ptr(
+/*=================*/
+				/* out: value of the field */
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index);	/* in: clustered index */
+/*************************************************************************
+Writes the trx id field to a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_trx_id(
+/*===============*/
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	dulint		trx_id);	/* in: value of the field */
+/*************************************************************************
+Sets the roll pointer field in a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_roll_ptr(
+/*=================*/
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	dulint		roll_ptr);/* in: value of the field */
+/*********************************************************************
+When an insert to a table is performed, this function builds the entry which
+has to be inserted to an index on the table. */
+
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+				/* out: index entry which should be inserted */
+	dtuple_t*	row, 	/* in: row which should be inserted to the
+				table */
+	dict_index_t*	index, 	/* in: index on the table */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory for
+				the index entry is allocated */
+/*********************************************************************
+Builds an index entry from a row. */
+
+void
+row_build_index_entry_to_tuple(
+/*===========================*/
+	dtuple_t*	entry,	/* in/out: index entry; the dtuple must have
+				enough fields for the index! */
+	dtuple_t*	row, 	/* in: row */
+	dict_index_t*	index); /* in: index on the table */
+/***********************************************************************
+An inverse function to dict_row_build_index_entry. Builds a row from a
+record in a clustered index. */
+
+dtuple_t*
+row_build(
+/*======*/
+				/* out, own: row built; see the NOTE below! */
+	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+				the former copies also the data fields to
+				heap as the latter only places pointers to
+				data fields on the index page, and thus is
+				more efficient */
+	dict_index_t*	index,	/* in: clustered index */
+	rec_t*		rec,	/* in: record in the clustered index;
+				NOTE: in the case ROW_COPY_POINTERS
+				the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row dtuple is used! */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory
+				needed is allocated */
+/***********************************************************************
+An inverse function to dict_row_build_index_entry. Builds a row from a
+record in a clustered index. */
+
+void
+row_build_to_tuple(
+/*===============*/
+	dtuple_t*	row,	/* in/out: row built; see the NOTE below! */
+	dict_index_t*	index,	/* in: clustered index */
+	rec_t*		rec);	/* in: record in the clustered index;
+				NOTE: the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row dtuple is used! */
+/***********************************************************************
+Converts an index record to a typed data tuple. */
+
+dtuple_t*
+row_rec_to_index_entry(
+/*===================*/
+				/* out, own: index entry built; see the
+				NOTE below! */
+	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+				the former copies also the data fields to
+				heap as the latter only places pointers to
+				data fields on the index page */
+	dict_index_t*	index,	/* in: index */
+	rec_t*		rec,	/* in: record in the index;
+				NOTE: in the case ROW_COPY_POINTERS
+				the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the dtuple is used! */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory
+				needed is allocated */
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+
+dtuple_t*
+row_build_row_ref(
+/*==============*/
+				/* out, own: row reference built; see the
+				NOTE below! */
+	ulint		type,	/* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
+				the former copies also the data fields to
+				heap, whereas the latter only places pointers
+				to data fields on the index page */
+	dict_index_t*	index,	/* in: index */
+	rec_t*		rec,	/* in: record in the index;
+				NOTE: in the case ROW_COPY_POINTERS
+				the data fields in the row will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row reference is used! */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory
+				needed is allocated */
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+
+void
+row_build_row_ref_in_tuple(
+/*=======================*/
+	dtuple_t*	ref,	/* in/out: row reference built; see the
+				NOTE below! */
+	dict_index_t*	index,	/* in: index */
+	rec_t*		rec);	/* in: record in the index;
+				NOTE: the data fields in ref will point
+				directly into this record, therefore,
+				the buffer page of this record must be
+				at least s-latched and the latch held
+				as long as the row reference is used! */
+/***********************************************************************
+From a row build a row reference with which we can search the clustered
+index record. */
+
+void
+row_build_row_ref_from_row(
+/*=======================*/
+	dtuple_t*	ref,	/* in/out: row reference built; see the
+				NOTE below! ref must have the right number
+				of fields! */
+	dict_table_t*	table,	/* in: table */
+	dtuple_t*	row);	/* in: row
+				NOTE: the data fields in ref will point
+				directly into data of this row */
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+	dtuple_t*	ref,	/* in: typed data tuple where the reference
+				is built */
+	ulint*		map,	/* in: array of field numbers in rec telling
+				how ref should be built from the fields of
+				rec */
+	rec_t*		rec);	/* in: record in the index; must be preserved
+				while ref is used, as we do not copy field
+				values to heap */
+/*******************************************************************
+Searches the clustered index record for a row, if we have the row
+reference. */
+
+ibool
+row_search_on_row_ref(
+/*==================*/
+				/* out: TRUE if found */
+	btr_pcur_t*	pcur,	/* in/out: persistent cursor, which must
+				be closed by the caller */
+	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
+	dict_table_t*	table,	/* in: table */
+	dtuple_t*	ref,	/* in: row reference */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************************
+Fetches the clustered index record for a secondary index record. The latches
+on the secondary index record are preserved. */
+
+rec_t*
+row_get_clust_rec(
+/*==============*/
+				/* out: record or NULL, if no record found */
+	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
+	rec_t*		rec,	/* in: record in a secondary index */
+	dict_index_t*	index,	/* in: secondary index */
+	dict_index_t**	clust_index,/* out: clustered index */
+	mtr_t*		mtr);	/* in: mtr */
+/*******************************************************************
+Searches an index record. */
+
+ibool
+row_search_index_entry(
+/*===================*/
+				/* out: TRUE if found */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	entry,	/* in: index entry */
+	ulint		mode,	/* in: BTR_MODIFY_LEAF, ... */
+	btr_pcur_t*	pcur,	/* in/out: persistent cursor, which must
+				be closed by the caller */
+	mtr_t*		mtr);	/* in: mtr */
+
+	
+#define ROW_COPY_DATA		1
+#define ROW_COPY_POINTERS	2
+
+/* The allowed latching order of index records is the following:
+(1) a secondary index record ->
+(2) the clustered index record ->
+(3) rollback segment data for the clustered index record.
+
+No new latches may be obtained while the kernel mutex is reserved.
+However, the kernel mutex can be reserved while latches are owned. */
+
+#ifndef UNIV_NONINL
+#include "row0row.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic
new file mode 100644
index 00000000000..8e5121f5a96
--- /dev/null
+++ b/innobase/include/row0row.ic
@@ -0,0 +1,165 @@
+/******************************************************
+General row routines
+
+(c) 1996 Innobase Oy
+
+Created 4/20/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0dict.h"
+#include "rem0rec.h"
+#include "trx0undo.h"
+
+/*************************************************************************
+Reads the trx id or roll ptr field from a clustered index record: this function
+is slower than the specialized inline functions. */
+
+dulint
+row_get_rec_sys_field(
+/*==================*/
+				/* out: value of the field */
+	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index);	/* in: clustered index */
+/*************************************************************************
+Sets the trx id or roll ptr field in a clustered index record: this function
+is slower than the specialized inline functions. */
+
+void
+row_set_rec_sys_field(
+/*==================*/
+				/* out: value of the field */
+	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	dulint		val);	/* in: value to set */
+
+/*************************************************************************
+Reads the trx id field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_trx_id(
+/*===============*/
+				/* out: value of the field */
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index)	/* in: clustered index */
+{
+	ulint	offset;
+
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	offset = index->trx_id_offset;
+	
+	if (offset) {
+		return(trx_read_trx_id(rec + offset));
+	} else {
+		return(row_get_rec_sys_field(DATA_TRX_ID, rec, index));
+	}
+}
+
+/*************************************************************************
+Reads the roll pointer field from a clustered index record. */
+UNIV_INLINE
+dulint
+row_get_rec_roll_ptr(
+/*=================*/
+				/* out: value of the field */
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index)	/* in: clustered index */
+{
+	ulint	offset;
+
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	offset = index->trx_id_offset;
+	
+	if (offset) {
+		return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
+	} else {
+		return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index));
+	}
+}
+
+/*************************************************************************
+Writes the trx id field to a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_trx_id(
+/*===============*/
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	dulint		trx_id)	/* in: value of the field */
+{
+	ulint	offset;
+
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	offset = index->trx_id_offset;
+	
+	if (offset) {
+		trx_write_trx_id(rec + offset, trx_id);
+	} else {
+		row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id);
+	}
+}
+
+/*************************************************************************
+Sets the roll pointer field in a clustered index record. */
+UNIV_INLINE
+void
+row_set_rec_roll_ptr(
+/*=================*/
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	dulint		roll_ptr)/* in: value of the field */
+{
+	ulint	offset;
+
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	offset = index->trx_id_offset;
+	
+	if (offset) {
+		trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
+	} else {
+	 	row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr);
+	}
+}
+
+/***********************************************************************
+Builds from a secondary index record a row reference with which we can
+search the clustered index record. */
+UNIV_INLINE
+void
+row_build_row_ref_fast(
+/*===================*/
+	dtuple_t*	ref,	/* in: typed data tuple where the reference
+				is built */
+	ulint*		map,	/* in: array of field numbers in rec telling
+				how ref should be built from the fields of
+				rec */
+	rec_t*		rec)	/* in: record in the index; must be preserved
+				while ref is used, as we do not copy field
+				values to heap */
+{
+	dfield_t*	dfield;
+	byte*		field;
+	ulint		len;
+	ulint		ref_len;
+	ulint		field_no;
+	ulint		i;
+	
+	ref_len = dtuple_get_n_fields(ref);
+	
+	for (i = 0; i < ref_len; i++) {
+		dfield = dtuple_get_nth_field(ref, i);
+
+		field_no = *(map + i);
+
+		if (field_no != ULINT_UNDEFINED) {
+
+			field = rec_get_nth_field(rec, field_no, &len);
+			dfield_set_data(dfield, field, len);
+		}
+	}
+}
diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h
new file mode 100644
index 00000000000..a64d3f8e425
--- /dev/null
+++ b/innobase/include/row0sel.h
@@ -0,0 +1,330 @@
+/******************************************************
+Select
+
+(c) 1997 Innobase Oy
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0sel_h
+#define row0sel_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "que0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "row0types.h"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "btr0pcur.h"
+#include "read0read.h"
+#include "row0mysql.h"
+
+/*************************************************************************
+Creates a select node struct. */
+
+sel_node_t*
+sel_node_create(
+/*============*/
+				/* out, own: select node struct */
+	mem_heap_t*	heap);	/* in: memory heap where created */
+/*************************************************************************
+Frees the memory private to a select node when a query graph is freed,
+does not free the heap where the node was originally created. */
+
+void
+sel_node_free_private(
+/*==================*/
+	sel_node_t*	node);	/* in: select node struct */
+/*************************************************************************
+Frees a prefetch buffer for a column, including the dynamically allocated
+memory for data stored there. */
+
+void
+sel_col_prefetch_buf_free(
+/*======================*/
+	sel_buf_t*	prefetch_buf);	/* in, own: prefetch buffer */
+/*************************************************************************
+Gets the plan node for the nth table in a join. */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+	sel_node_t*	node,
+	ulint		i);
+/**************************************************************************
+Performs a select step. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+row_sel_step(
+/*=========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of an open or close cursor statement node. */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs a fetch for a cursor. */
+
+que_thr_t*
+fetch_step(
+/*=======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************
+Prints a row in a select result. */
+
+que_thr_t*
+row_printf_step(
+/*============*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/********************************************************************
+Converts a key value stored in MySQL format to an Innobase dtuple.
+The last field of the key value may be just a prefix of a fixed length
+field: hence the parameter key_len. */
+
+void
+row_sel_convert_mysql_key_to_innobase(
+/*==================================*/
+	dtuple_t*	tuple,		/* in: tuple where to build;
+					NOTE: we assume that the type info
+					in the tuple is already according
+					to index! */
+	byte*		buf,		/* in: buffer to use in field
+					conversions */
+	dict_index_t*	index,		/* in: index of the key value */
+	byte*		key_ptr,	/* in: MySQL key value */
+	ulint		key_len);	/* in: MySQL key value length */
+/************************************************************************
+Searches for rows in the database. This is used in the interface to
+MySQL. This function opens a cursor, and also implements fetch next
+and fetch prev. NOTE that if we do a search with a full key value
+from a unique index (ROW_SEL_EXACT), then we will not store the cursor
+position and fetch next or fetch prev must not be tried to the cursor! */
+
+ulint
+row_search_for_mysql(
+/*=================*/
+					/* out: DB_SUCCESS,
+					DB_RECORD_NOT_FOUND, 
+					DB_END_OF_INDEX, or DB_DEADLOCK */
+	byte*		buf,		/* in/out: buffer for the fetched
+					row in the MySQL format */
+	ulint		mode,		/* in: search mode PAGE_CUR_L, ... */
+	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct for the
+					table handle; this contains the info
+					of search_tuple, index; if search
+					tuple contains 0 fields then we
+					position the cursor at the start or
+					the end of the index, depending on
+					'mode' */
+	ulint		match_mode,	/* in: 0 or ROW_SEL_EXACT or
+					ROW_SEL_EXACT_PREFIX */ 
+	ulint		direction);	/* in: 0 or ROW_SEL_NEXT or
+					ROW_SEL_PREV; NOTE: if this is != 0,
+					then prebuilt must have a pcur
+					with stored position! In opening of a
+					cursor 'direction' should be 0. */
+
+
+/* A structure for caching column values for prefetched rows */
+struct sel_buf_struct{
+	byte*		data;	/* data, or NULL; if not NULL, this field
+				has allocated memory which must be explicitly
+				freed; can be != NULL even when len is
+				UNIV_SQL_NULL */
+	ulint		len;	/* data length or UNIV_SQL_NULL */
+	ulint		val_buf_size;
+				/* size of memory buffer allocated for data:
+				this can be more than len; this is defined
+				when data != NULL */
+};
+
+struct plan_struct{
+	dict_table_t*	table;		/* table struct in the dictionary
+					cache */
+	dict_index_t*	index;		/* table index used in the search */
+	btr_pcur_t	pcur;		/* persistent cursor used to search
+					the index */
+	ibool		asc;		/* TRUE if cursor traveling upwards */
+	ibool		pcur_is_open;	/* TRUE if pcur has been positioned
+					and we can try to fetch new rows */
+	ibool		cursor_at_end;	/* TRUE if the cursor is open but
+					we know that there are no more
+					qualifying rows left to retrieve from
+					the index tree; NOTE though, that
+					there may still be unprocessed rows in
+					the prefetch stack; always FALSE when
+					pcur_is_open is FALSE */
+	ibool		stored_cursor_rec_processed;
+					/* TRUE if the pcur position has been
+					stored and the record it is positioned
+					on has already been processed */
+	que_node_t**	tuple_exps;	/* array of expressions which are used
+					to calculate the field values in the
+					search tuple: there is one expression
+					for each field in the search tuple */
+	dtuple_t*	tuple;		/* search tuple */
+	ulint		mode;		/* search mode: PAGE_CUR_G, ... */
+	ulint		n_exact_match;	/* number of first fields in the search
+					tuple which must be exactly matched */
+	ibool		unique_search;	/* TRUE if we are searching an
+					index record with a unique key */
+	ulint		n_rows_fetched;	/* number of rows fetched using pcur
+					after it was opened */
+	ulint		n_rows_prefetched;/* number of prefetched rows cached
+					for fetch: fetching several rows in
+					the same mtr saves CPU time */
+	ulint		first_prefetched;/* index of the first cached row in
+					select buffer arrays for each column */
+	ibool		no_prefetch;	/* no prefetch for this table */
+	ibool		mixed_index;	/* TRUE if index is a clustered index
+					in a mixed cluster */
+	sym_node_list_t	columns;	/* symbol table nodes for the columns
+					to retrieve from the table */
+	UT_LIST_BASE_NODE_T(func_node_t)
+			end_conds;	/* conditions which determine the
+					fetch limit of the index segment we
+					have to look at: when one of these
+					fails, the result set has been
+					exhausted for the cursor in this
+					index; these conditions are normalized
+					so that in a comparison the column
+					for this table is the first argument */
+	UT_LIST_BASE_NODE_T(func_node_t)
+			other_conds;	/* the rest of search conditions we can
+					test at this table in a join */
+	ibool		must_get_clust;	/* TRUE if index is a non-clustered
+					index and we must also fetch the
+					clustered index record; this is the
+					case if the non-clustered record does
+					not contain all the needed columns, or
+					if this is a single-table explicit
+					cursor, or a searched update or
+					delete */
+	ulint*		clust_map;	/* map telling how clust_ref is built
+					from the fields of a non-clustered
+					record */
+	dtuple_t*	clust_ref;	/* the reference to the clustered
+					index entry is built here if index is
+					a non-clustered index */
+	btr_pcur_t	clust_pcur;	/* if index is non-clustered, we use
+					this pcur to search the clustered
+					index */
+	mem_heap_t*	old_vers_heap;	/* memory heap used in building an old
+					version of a row, or NULL */	
+};	
+
+struct sel_node_struct{
+	que_common_t	common;		/* node type: QUE_NODE_SELECT */
+	ulint		state;		/* node state */
+	que_node_t*	select_list;	/* select list */
+	sym_node_t*	into_list;	/* variables list or NULL */
+	sym_node_t*	table_list;	/* table list */
+	ibool		asc;		/* TRUE if the rows should be fetched
+					in an ascending order */
+	ibool		set_x_locks;	/* TRUE if the cursor is for update or
+					delete, which means that a row x-lock
+					should be placed on the cursor row */
+	ibool		select_will_do_update;
+					/* TRUE if the select is for a searched
+					update which can be performed in-place:
+					in this case the select will take care
+					of the update */
+	ulint		latch_mode;	/* BTR_SEARCH_LEAF, or BTR_MODIFY_LEAF
+					if select_will_do_update is TRUE */
+	ulint		row_lock_mode;	/* LOCK_X or LOCK_S */
+	ulint		n_tables;	/* number of tables */
+	ulint		fetch_table;	/* number of the next table to access
+					in the join */
+	plan_t*		plans;		/* array of n_tables many plan nodes
+					containing the search plan and the
+					search data structures */
+	que_node_t*	search_cond;	/* search condition */
+	read_view_t*	read_view;	/* if the query is a non-locking
+					consistent read, its read view is
+					placed here, otherwise NULL */
+	ibool		consistent_read;/* TRUE if the select is a consistent,
+					non-locking read */
+	order_node_t*	order_by;	/* order by column definition, or
+					NULL */
+	ibool		is_aggregate;	/* TRUE if the select list consists of
+					aggregate functions */
+	ibool		aggregate_already_fetched;
+					/* TRUE if the aggregate row has
+					already been fetched for the current
+					cursor */
+	ibool		can_get_updated;/* this is TRUE if the select is in a
+					single-table explicit cursor which can
+					get updated within the stored procedure,
+					or in a searched update or delete;
+					NOTE that to determine of an explicit
+					cursor if it can get updated, the
+					parser checks from a stored procedure
+					if it contains positioned update or
+					delete statements */
+	sym_node_t*	explicit_cursor;/* not NULL if an explicit cursor */
+	UT_LIST_BASE_NODE_T(sym_node_t)
+			copy_variables; /* variables whose values we have to
+					copy when an explicit cursor is opened,
+					so that they do not change between
+					fetches */
+};
+	
+/* Select node states */
+#define	SEL_NODE_CLOSED		0	/* it is a declared cursor which is not
+					currently open */
+#define SEL_NODE_OPEN		1	/* intention locks not yet set on
+					tables */
+#define SEL_NODE_FETCH		2	/* intention locks have been set */
+#define SEL_NODE_NO_MORE_ROWS	3	/* cursor has reached the result set
+					end */
+
+/* Fetch statement node */
+struct fetch_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_FETCH */
+	sel_node_t*	cursor_def;	/* cursor definition */
+	sym_node_t*	into_list;	/* variables to set */
+};
+
+/* Open or close cursor statement node */
+struct open_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_OPEN */
+	ulint		op_type;	/* ROW_SEL_OPEN_CURSOR or
+					ROW_SEL_CLOSE_CURSOR */
+	sel_node_t*	cursor_def;	/* cursor definition */
+};
+
+/* Row printf statement node */
+struct row_printf_node_struct{
+	que_common_t	common;		/* type: QUE_NODE_ROW_PRINTF */
+	sel_node_t*	sel_node;	/* select */
+};
+
+#define ROW_SEL_OPEN_CURSOR	0
+#define ROW_SEL_CLOSE_CURSOR	1
+
+/* Flags for the MySQL interface */
+#define ROW_SEL_NEXT		1
+#define ROW_SEL_PREV		2
+
+#define ROW_SEL_EXACT		1	/* search using a complete key value */
+#define ROW_SEL_EXACT_PREFIX 	2	/* search using a key prefix which
+					must match to rows: the prefix may
+					contain an incomplete field (the
+					last field in prefix may be just
+					a prefix of a fixed length column) */
+
+#ifndef UNIV_NONINL
+#include "row0sel.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0sel.ic b/innobase/include/row0sel.ic
new file mode 100644
index 00000000000..9005624b6ca
--- /dev/null
+++ b/innobase/include/row0sel.ic
@@ -0,0 +1,91 @@
+/******************************************************
+Select
+
+(c) 1997 Innobase Oy
+
+Created 12/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+
+/*************************************************************************
+Gets the plan node for the nth table in a join. */
+UNIV_INLINE
+plan_t*
+sel_node_get_nth_plan(
+/*==================*/
+				/* out: plan node */
+	sel_node_t*	node,	/* in: select node */
+	ulint		i)	/* in: get ith plan node */
+{
+	ut_ad(i < node->n_tables);
+
+	return(node->plans + i);
+}
+
+/*************************************************************************
+Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
+that it will start fetching from the start of the result set again, regardless
+of where it was before, and it will set intention locks on the tables. */
+UNIV_INLINE
+void
+sel_node_reset_cursor(
+/*==================*/
+	sel_node_t*	node)	/* in: select node */
+{
+	node->state = SEL_NODE_OPEN;
+}
+
+/**************************************************************************
+Performs an execution step of an open or close cursor statement node. */
+UNIV_INLINE
+que_thr_t*
+open_step(
+/*======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	sel_node_t*	sel_node;
+	open_node_t*	node;
+	ulint		err;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
+
+	sel_node = node->cursor_def;
+
+	err = DB_SUCCESS;
+
+	if (node->op_type == ROW_SEL_OPEN_CURSOR) {
+
+/*		if (sel_node->state == SEL_NODE_CLOSED) { */
+
+			sel_node_reset_cursor(sel_node);
+/*		} else {
+			err = DB_ERROR;
+		} */
+	} else {
+		if (sel_node->state != SEL_NODE_CLOSED) {
+
+			sel_node->state = SEL_NODE_CLOSED;
+		} else {
+			err = DB_ERROR;
+		}
+	}
+			
+	if (err != DB_SUCCESS) {
+		/* SQL error detected */
+		printf("SQL error %lu\n", err);
+
+		ut_error;
+		que_thr_handle_error(thr, err, NULL, 0);
+
+		return(NULL);
+	}
+		
+	thr->run_node = que_node_get_parent(node);
+	
+	return(thr);
+}
diff --git a/innobase/include/row0types.h b/innobase/include/row0types.h
new file mode 100644
index 00000000000..79b864f4835
--- /dev/null
+++ b/innobase/include/row0types.h
@@ -0,0 +1,37 @@
+/******************************************************
+Row operation global types
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0types_h
+#define row0types_h
+
+typedef struct plan_struct plan_t;
+
+typedef	struct upd_struct upd_t;
+
+typedef struct upd_field_struct upd_field_t;
+
+typedef	struct upd_node_struct upd_node_t;
+
+typedef	struct del_node_struct del_node_t;
+
+typedef	struct ins_node_struct ins_node_t;
+
+typedef struct sel_node_struct	sel_node_t;
+
+typedef struct open_node_struct	open_node_t;
+
+typedef struct fetch_node_struct fetch_node_t;
+
+typedef struct row_printf_node_struct 	row_printf_node_t;
+typedef struct sel_buf_struct	sel_buf_t;
+
+typedef	struct undo_node_struct undo_node_t;
+
+typedef	struct purge_node_struct purge_node_t;
+
+#endif
diff --git a/innobase/include/row0uins.h b/innobase/include/row0uins.h
new file mode 100644
index 00000000000..df5e072487e
--- /dev/null
+++ b/innobase/include/row0uins.h
@@ -0,0 +1,37 @@
+/******************************************************
+Fresh insert undo
+
+(c) 1996 Innobase Oy
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0uins_h
+#define row0uins_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+	
+/***************************************************************
+Undoes a fresh insert of a row to a table. A fresh insert means that
+the same clustered index unique key did not have any record, even delete
+marked, at the time of the insert. */
+
+ulint
+row_undo_ins(
+/*=========*/
+				/* out: DB_SUCCESS */
+	undo_node_t*	node,	/* in: row undo node */
+	que_thr_t*	thr);	/* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "row0uins.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0uins.ic b/innobase/include/row0uins.ic
new file mode 100644
index 00000000000..2b3d5a10f95
--- /dev/null
+++ b/innobase/include/row0uins.ic
@@ -0,0 +1,8 @@
+/******************************************************
+Fresh insert undo
+
+(c) 1996 Innobase Oy
+
+Created 2/25/1997 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/row0umod.h b/innobase/include/row0umod.h
new file mode 100644
index 00000000000..2c8e19a80ae
--- /dev/null
+++ b/innobase/include/row0umod.h
@@ -0,0 +1,35 @@
+/******************************************************
+Undo modify of a row
+
+(c) 1997 Innobase Oy
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0umod_h
+#define row0umod_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+	
+/***************************************************************
+Undoes a modify operation on a row of a table. */
+
+ulint
+row_undo_mod(
+/*=========*/
+				/* out: DB_SUCCESS or error code */
+	undo_node_t*	node,	/* in: row undo node */
+	que_thr_t*	thr);	/* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "row0umod.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0umod.ic b/innobase/include/row0umod.ic
new file mode 100644
index 00000000000..fcbf4dbc1f3
--- /dev/null
+++ b/innobase/include/row0umod.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Undo modify of a row
+
+(c) 1997 Innobase Oy
+
+Created 2/27/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/row0undo.h b/innobase/include/row0undo.h
new file mode 100644
index 00000000000..5402f1d9236
--- /dev/null
+++ b/innobase/include/row0undo.h
@@ -0,0 +1,117 @@
+/******************************************************
+Row undo
+
+(c) 1997 Innobase Oy
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0undo_h
+#define row0undo_h
+
+#include "univ.i"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+
+/************************************************************************
+Creates a row undo node to a query graph. */
+
+undo_node_t*
+row_undo_node_create(
+/*=================*/
+				/* out, own: undo node */
+	trx_t*		trx,	/* in: transaction */
+	que_thr_t*	parent,	/* in: parent node, i.e., a thr node */
+	mem_heap_t*	heap);	/* in: memory heap where created */
+/***************************************************************
+Looks for the clustered index record when node has the row reference.
+The pcur in node is used in the search. If found, stores the row to node,
+and stores the position of pcur, and detaches it. The pcur must be closed
+by the caller in any case. */
+
+ibool
+row_undo_search_clust_to_pcur(
+/*==========================*/
+				/* out: TRUE if found; NOTE the node->pcur
+				must be closed by the caller, regardless of
+				the return value */
+	undo_node_t*	node,	/* in: row undo node */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************
+Undoes a row operation in a table. This is a high-level function used
+in SQL execution graphs. */
+
+que_thr_t*
+row_undo_step(
+/*==========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+
+/* A single query thread will try to perform the undo for all successive
+versions of a clustered index record, if the transaction has modified it
+several times during the execution which is rolled back. It may happen
+that the task is transferred to another query thread, if the other thread
+is assigned to handle an undo log record in the chain of different versions
+of the record, and the other thread happens to get the x-latch to the
+clustered index record at the right time.
+	If a query thread notices that the clustered index record it is looking
+for is missing, or the roll ptr field in the record doed not point to the
+undo log record the thread was assigned to handle, then it gives up the undo
+task for that undo log record, and fetches the next. This situation can occur
+just in the case where the transaction modified the same record several times
+and another thread is currently doing the undo for successive versions of
+that index record. */
+
+/* Undo node structure */
+
+struct undo_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_UNDO */
+	ulint		state;	/* node execution state */
+	trx_t*		trx;	/* trx for which undo is done */
+	dulint		roll_ptr;/* roll pointer to undo log record */
+	trx_undo_rec_t*	undo_rec;/* undo log record */
+	dulint		undo_no;/* undo number of the record */
+	ulint		rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
+				... */
+	dulint		new_roll_ptr; /* roll ptr to restore to clustered index
+				record */
+	dulint		new_trx_id; /* trx id to restore to clustered index
+				record */
+	btr_pcur_t	pcur;	/* persistent cursor used in searching the
+				clustered index record */
+	dict_table_t*	table;	/* table where undo is done; NOTE that the
+				table has to be released explicitly with
+				dict_table_release */
+	ulint		cmpl_info;/* compiler analysis of an update */
+	upd_t*		update;	/* update vector for a clustered index record */
+	dtuple_t*	ref;	/* row reference to the next row to handle */
+	dtuple_t*	row;	/* a copy (also fields copied to heap) of the
+				row to handle */
+	dict_index_t*	index;	/* the next index whose record should be
+				handled */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage for
+				row; this must be emptied after undo is tried
+				on a row */
+};
+
+/* Execution states for an undo node */
+#define	UNDO_NODE_FETCH_NEXT	1	/* we should fetch the next undo log
+					record */
+#define	UNDO_NODE_PREV_VERS	2	/* the roll ptr to previous version of
+					a row is stored in node, and undo
+					should be done based on it */
+#define UNDO_NODE_INSERT	3
+#define UNDO_NODE_MODIFY	4
+
+
+#ifndef UNIV_NONINL
+#include "row0undo.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0undo.ic b/innobase/include/row0undo.ic
new file mode 100644
index 00000000000..e7f89c7de67
--- /dev/null
+++ b/innobase/include/row0undo.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Row undo
+
+(c) 1997 Innobase Oy
+
+Created 1/8/1997 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
new file mode 100644
index 00000000000..3046345f446
--- /dev/null
+++ b/innobase/include/row0upd.h
@@ -0,0 +1,363 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0upd_h
+#define row0upd_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "btr0types.h"
+#include "btr0pcur.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "pars0types.h"
+	
+/*************************************************************************
+Creates an update vector object. */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+				/* out, own: update vector object */
+	ulint		n,	/* in: number of fields */
+	mem_heap_t*	heap);	/* in: heap from which memory allocated */
+/*************************************************************************
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector. */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+			/* out: number of fields */
+	upd_t*	update);	/* in: update vector */
+/*************************************************************************
+Returns the nth field of an update vector. */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+			/* out: update vector field */
+	upd_t*	update,	/* in: update vector */
+	ulint	n);	/* in: field position in update vector */
+/*************************************************************************
+Sets the clustered index field number to be updated by an update vector
+field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+	upd_field_t*	upd_field,	/* in: update vector field */
+	ulint		field_no,	/* in: field number in a clustered
+					index */
+	dict_index_t*	index);		/* in: clustered index */
+/*************************************************************************
+Writes into the redo log the values of trx id and roll ptr and enough info
+to determine their positions within a clustered index record. */
+
+byte*
+row_upd_write_sys_vals_to_log(
+/*==========================*/
+				/* out: new pointer to mlog */
+	dict_index_t*	index,	/* in: clustered index */
+	trx_t*		trx,	/* in: transaction */
+	dulint		roll_ptr,/* in: roll ptr of the undo log record */
+	byte*		log_ptr,/* pointer to a buffer of size > 20 opened
+				in mlog */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	trx_t*		trx,	/* in: transaction */
+	dulint		roll_ptr);/* in: roll ptr of the undo log record */
+/*************************************************************************
+Sets the trx id or roll ptr field of a clustered index entry. */
+
+void
+row_upd_index_entry_sys_field(
+/*==========================*/
+	dtuple_t*	entry,	/* in: index entry, where the memory buffers
+				for sys fields are already allocated:
+				the function just copies the new values to
+				them */
+	dict_index_t*	index,	/* in: clustered index */
+	ulint		type,	/* in: DATA_TRX_ID or DATA_ROLL_PTR */
+	dulint		val);	/* in: value to write */
+/*************************************************************************
+Creates an update node for a query graph. */
+
+upd_node_t*
+upd_node_create(
+/*============*/
+				/* out, own: update node */
+	mem_heap_t*	heap);	/* in: mem heap where created */
+/***************************************************************
+Writes to the redo log the new values of the fields occurring in the index. */
+
+void
+row_upd_index_write_log(
+/*====================*/
+	upd_t*	update,	/* in: update vector */
+	byte*	log_ptr,/* in: pointer to mlog buffer: must contain at least
+			MLOG_BUF_MARGIN bytes of free space; the buffer is
+			closed within this function */
+	mtr_t*	mtr);	/* in: mtr into whose log to write */
+/***************************************************************
+Returns TRUE if row update changes size of some field in index. */
+
+ibool
+row_upd_changes_field_size(
+/*=======================*/
+				/* out: TRUE if the update changes the size of
+				some field in index */		
+	rec_t*		rec,	/* in: record in clustered index */
+	dict_index_t*	index,	/* in: clustered index */
+	upd_t*		update);/* in: update vector */
+/***************************************************************
+Replaces the new column values stored in the update vector to the record
+given. No field size changes are allowed. This function is used only for
+a clustered index */
+
+void
+row_upd_rec_in_place(
+/*=================*/
+	rec_t*	rec,	/* in/out: record where replaced */
+	upd_t*	update);/* in: update vector */
+/*******************************************************************
+Builds an update vector from those fields, excluding the roll ptr and
+trx id fields, which in an index entry differ from a record that has
+the equal ordering fields. */
+
+upd_t*
+row_upd_build_difference(
+/*=====================*/
+				/* out, own: update vector of differing
+				fields, excluding roll ptr and trx id */
+	dict_index_t*	index,	/* in: clustered index */
+	dtuple_t*	entry,	/* in: entry to insert */
+	rec_t*		rec,	/* in: clustered index record */
+	mem_heap_t*	heap);	/* in: memory heap from which allocated */
+/***************************************************************
+Replaces the new column values stored in the update vector to the index entry
+given. */
+
+void
+row_upd_index_replace_new_col_vals(
+/*===============================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	dict_index_t*	index,	/* in: index; NOTE that may also be a
+				non-clustered index */
+	upd_t*		update);	/* in: update vector */
+/***************************************************************
+Replaces the new column values stored in the update vector to the
+clustered index entry given. */
+
+void
+row_upd_clust_index_replace_new_col_vals(
+/*=====================================*/
+	dtuple_t*	entry,	/* in/out: index entry where replaced */
+	upd_t*		update);	/* in: update vector */
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_ord_field(
+/*======================*/
+				/* out: TRUE if update vector changes
+				an ordering field in the index record */
+	dtuple_t*	row,	/* in: old value of row, or NULL if the
+				row and the data values in update are not
+				known when this function is called, e.g., at
+				compile time */
+	dict_index_t*	index,	/* in: index of the record */
+	upd_t*		update);/* in: update vector for the row */
+/***************************************************************
+Checks if an update vector changes an ordering field of an index record.
+This function is fast if the update vector is short or the number of ordering
+fields in the index is small. Otherwise, this can be quadratic. */
+
+ibool
+row_upd_changes_some_index_ord_field(
+/*=================================*/
+				/* out: TRUE if update vector may change
+				an ordering field in an index record */
+	dict_table_t*	table,	/* in: table */
+	upd_t*		update);/* in: update vector for the row */
+/***************************************************************
+Updates a row in a table. This is a high-level function used
+in SQL execution graphs. */
+
+que_thr_t*
+row_upd_step(
+/*=========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Performs an in-place update for the current clustered index record in
+select. */
+
+void
+row_upd_in_place_in_select(
+/*=======================*/
+	sel_node_t*	sel_node,	/* in: select node */
+	que_thr_t*	thr,		/* in: query thread */
+	mtr_t*		mtr);		/* in: mtr */
+/*************************************************************************
+Parses the log data of system field values. */
+
+byte*
+row_upd_parse_sys_vals(
+/*===================*/
+			/* out: log data end or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	ulint*	pos,	/* out: TRX_ID position in record */
+	dulint*	trx_id,	/* out: trx id */
+	dulint*	roll_ptr);/* out: roll ptr */
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record in database
+recovery. */
+
+void
+row_upd_rec_sys_fields_in_recovery(
+/*===============================*/
+	rec_t*	rec,	/* in: record */
+	ulint	pos,	/* in: TRX_ID position in rec */
+	dulint	trx_id,	/* in: transaction id */
+	dulint	roll_ptr);/* in: roll ptr of the undo log record */
+/*************************************************************************
+Parses the log data written by row_upd_index_write_log. */
+
+byte*
+row_upd_index_parse(
+/*================*/
+				/* out: log data end or NULL */
+	byte*		ptr,	/* in: buffer */
+	byte*		end_ptr,/* in: buffer end */
+	mem_heap_t*	heap,	/* in: memory heap where update vector is
+				built */
+	upd_t**		update_out);/* out: update vector */
+
+
+/* Update vector field */
+struct upd_field_struct{
+	ulint		field_no;	/* field number in the clustered
+					index */
+	que_node_t*	exp;		/* expression for calculating a new
+					value: it refers to column values and
+					constants in the symbol table of the
+					query graph */
+	dfield_t	new_val;	/* new value for the column */
+};
+
+/* Update vector structure */
+struct upd_struct{
+	ulint		info_bits;	/* new value of info bits to record;
+					default is 0 */
+	ulint		n_fields;	/* number of update fields */
+	upd_field_t*	fields;		/* array of update fields */
+};
+
+/* Update node structure which also implements the delete operation
+of a row */
+
+struct upd_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_UPDATE */
+	ibool		is_delete;/* TRUE if delete, FALSE if update */
+	ibool		searched_update;
+				/* TRUE if searched update, FALSE if
+				positioned */
+	ibool		select_will_do_update;
+				/* TRUE if a searched update where ordering
+				fields will not be updated, and the size of
+				the fields will not change: in this case the
+				select node will take care of the update */
+	ibool		in_mysql_interface;
+				/* TRUE if the update node was created
+				for the MySQL interface */
+	sel_node_t*	select;	/* query graph subtree implementing a base
+				table cursor: the rows returned will be
+				updated */
+	btr_pcur_t*	pcur;	/* persistent cursor placed on the clustered
+				index record which should be updated or
+				deleted; the cursor is stored in the graph
+				of 'select' field above, except in the case
+				of the MySQL interface */
+	dict_table_t*	table;	/* table where updated */
+	upd_t*		update;	/* update vector for the row */
+	sym_node_list_t	columns;/* symbol table nodes for the columns
+				to retrieve from the table */
+	ibool		has_clust_rec_x_lock;
+				/* TRUE if the select which retrieves the
+				records to update already sets an x-lock on
+				the clustered record; note that it must always
+				set at least an s-lock */
+	ulint		cmpl_info;/* information extracted during query
+				compilation; speeds up execution:
+				UPD_NODE_NO_ORD_CHANGE and
+				UPD_NODE_NO_SIZE_CHANGE, ORed */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/* node execution state */
+	dict_index_t*	index;	/* NULL, or the next index whose record should
+				be updated */
+	dtuple_t*	row;	/* NULL, or a copy (also fields copied to
+				heap) of the row to update; this must be reset
+				to NULL after a successful update */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage for
+				row; this must be emptied after a successful
+				update if node->row != NULL */
+	/*----------------------*/
+	sym_node_t*	table_sym;/* table node in symbol table */
+	que_node_t*	col_assign_list;
+				/* column assignment list */
+	ulint		magic_n;
+};
+
+#define	UPD_NODE_MAGIC_N	1579975
+
+/* Node execution states */
+#define UPD_NODE_SET_IX_LOCK	   1	/* execution came to the node from
+					a node above and if the field
+					has_clust_rec_x_lock is FALSE, we
+					should set an intention x-lock on
+					the table */
+#define UPD_NODE_UPDATE_CLUSTERED  2	/* clustered index record should be
+					updated */
+#define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
+					inserted, old record is already delete
+					marked */
+#define UPD_NODE_UPDATE_ALL_SEC	   4	/* an ordering field of the clustered
+					index record was changed, or this is
+					a delete operation: should update
+					all the secondary index records */
+#define	UPD_NODE_UPDATE_SOME_SEC   5 	/* secondary index entries should be
+					looked at and updated if an ordering
+					field changed */
+
+/* Compilation info flags: these must fit within one byte */
+#define UPD_NODE_NO_ORD_CHANGE	1	/* no secondary index record will be
+					changed in the update and no ordering
+					field of the clustered index */
+#define UPD_NODE_NO_SIZE_CHANGE	2	/* no record field size will be
+					changed in the update */
+
+#ifndef UNIV_NONINL
+#include "row0upd.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
new file mode 100644
index 00000000000..b1b10bef0e8
--- /dev/null
+++ b/innobase/include/row0upd.ic
@@ -0,0 +1,105 @@
+/******************************************************
+Update of a row
+
+(c) 1996 Innobase Oy
+
+Created 12/27/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mtr0log.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "row0row.h"
+#include "btr0sea.h"
+
+/*************************************************************************
+Creates an update vector object. */
+UNIV_INLINE
+upd_t*
+upd_create(
+/*=======*/
+				/* out, own: update vector object */
+	ulint		n,	/* in: number of fields */
+	mem_heap_t*	heap)	/* in: heap from which memory allocated */
+{
+	upd_t*	update;
+
+	update = mem_heap_alloc(heap, sizeof(upd_t));
+
+	update->info_bits = 0;
+	update->n_fields = n;
+	update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
+
+	return(update);
+}
+
+/*************************************************************************
+Returns the number of fields in the update vector == number of columns
+to be updated by an update vector. */
+UNIV_INLINE
+ulint
+upd_get_n_fields(
+/*=============*/
+			/* out: number of fields */
+	upd_t*	update)	/* in: update vector */
+{
+	ut_ad(update);
+
+	return(update->n_fields);
+}
+
+/*************************************************************************
+Returns the nth field of an update vector. */
+UNIV_INLINE
+upd_field_t*
+upd_get_nth_field(
+/*==============*/
+			/* out: update vector field */
+	upd_t*	update,	/* in: update vector */
+	ulint	n)	/* in: field position in update vector */
+{
+	ut_ad(update);
+	ut_ad(n < update->n_fields);
+
+	return(update->fields + n);
+}
+
+/*************************************************************************
+Sets the clustered index field number to be updated by an update vector
+field. */
+UNIV_INLINE
+void
+upd_field_set_field_no(
+/*===================*/
+	upd_field_t*	upd_field,	/* in: update vector field */
+	ulint		field_no,	/* in: field number in a clustered
+					index */
+	dict_index_t*	index)		/* in: clustered index */
+{
+	ut_ad(index->type & DICT_CLUSTERED);
+	
+	upd_field->field_no = field_no;
+
+	dtype_copy(dfield_get_type(&(upd_field->new_val)),
+				dict_index_get_nth_type(index, field_no));
+}
+
+/*************************************************************************
+Updates the trx id and roll ptr field in a clustered index record when
+a row is updated or marked deleted. */
+UNIV_INLINE
+void
+row_upd_rec_sys_fields(
+/*===================*/
+	rec_t*		rec,	/* in: record */
+	dict_index_t*	index,	/* in: clustered index */
+	trx_t*		trx,	/* in: transaction */
+	dulint		roll_ptr)/* in: roll ptr of the undo log record */
+{
+	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(!buf_block_align(rec)->is_hashed
+			|| rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+
+	row_set_rec_trx_id(rec, index, trx->id);
+	row_set_rec_roll_ptr(rec, index, roll_ptr);
+}
diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h
new file mode 100644
index 00000000000..30cf82144e9
--- /dev/null
+++ b/innobase/include/row0vers.h
@@ -0,0 +1,95 @@
+/******************************************************
+Row versions
+
+(c) 1997 Innobase Oy
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef row0vers_h
+#define row0vers_h
+
+#include "univ.i"
+#include "data0data.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+#include "rem0types.h"
+#include "mtr0mtr.h"
+#include "read0types.h"
+
+/*********************************************************************
+Finds out if an active transaction has inserted or modified a secondary
+index record. NOTE: the kernel mutex is temporarily released in this
+function! */
+
+trx_t*
+row_vers_impl_x_locked_off_kernel(
+/*==============================*/
+				/* out: NULL if committed, else the active
+				transaction; NOTE that the kernel mutex is
+				temporarily released! */
+	rec_t*		rec,	/* in: record in a secondary index */
+	dict_index_t*	index);	/* in: the secondary index */
+/*********************************************************************
+Finds out if we must preserve a delete marked earlier version of a clustered
+index record, because it is >= the purge view. */
+
+ibool
+row_vers_must_preserve_del_marked(
+/*==============================*/
+			/* out: TRUE if earlier version should be preserved */
+	dulint	trx_id,	/* in: transaction id in the version */
+	mtr_t*	mtr);	/* in: mtr holding the latch on the clustered index
+			record; it will also hold the latch on purge_view */
+/*********************************************************************
+Finds out if a version of the record, where the version >= the current
+purge view, should have ientry as its secondary index entry. We check
+if there is any not delete marked version of the record where the trx
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE. */
+
+ibool
+row_vers_old_has_index_entry(
+/*=========================*/
+				/* out: TRUE if earlier version should have */
+	ibool		also_curr,/* in: TRUE if also rec is included in the
+				versions to search; otherwise only versions
+				prior to it are searched */
+	rec_t*		rec,	/* in: record in the clustered index; the
+				caller must have a latch on the page */
+	mtr_t*		mtr,	/* in: mtr holding the latch on rec; it will
+				also hold the latch on purge_view */
+	dict_index_t*	index,	/* in: the secondary index */
+	dtuple_t*	ientry);	/* in: the secondary index entry */
+/*********************************************************************
+Constructs the version of a clustered index record which a consistent
+read should see. We assume that the trx id stored in rec is such that
+the consistent read should not see rec in its present version. */
+
+ulint
+row_vers_build_for_consistent_read(
+/*===============================*/
+				/* out: DB_SUCCESS or DB_MISSING_HISTORY */
+	rec_t*		rec,	/* in: record in a clustered index; the
+				caller must have a latch on the page; this
+				latch locks the top of the stack of versions
+				of this records */
+	mtr_t*		mtr,	/* in: mtr holding the latch on rec; it will
+				also hold the latch on purge_view */
+	dict_index_t*	index,	/* in: the clustered index */
+	read_view_t*	view,	/* in: the consistent read view */
+	mem_heap_t*	in_heap,/* in: memory heap from which the memory for
+				old_vers is allocated; memory for possible
+				intermediate versions is allocated and freed
+				locally within the function */
+	rec_t**		old_vers);/* out, own: old version, or NULL if the
+				record does not exist in the view, that is,
+				it was freshly inserted afterwards */
+
+
+#ifndef UNIV_NONINL
+#include "row0vers.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic
new file mode 100644
index 00000000000..aa7a7aa2299
--- /dev/null
+++ b/innobase/include/row0vers.ic
@@ -0,0 +1,83 @@
+/******************************************************
+Row versions
+
+(c) 1997 Innobase Oy
+
+Created 2/6/1997 Heikki Tuuri
+*******************************************************/
+
+#include "row0row.h"
+#include "dict0dict.h"
+#include "read0read.h"
+#include "page0page.h"
+#include "log0recv.h"
+
+/*************************************************************************
+Fetches the trx id of a clustered index record or version. */
+UNIV_INLINE
+dulint
+row_vers_get_trx_id(
+/*================*/
+				/* out: trx id or ut_dulint_zero if the
+				clustered index record not found */
+	rec_t*		rec,	/* in: clustered index record, or an old
+				version of it */
+	dict_table_t*	table)	/* in: table */
+{
+	return(row_get_rec_trx_id(rec, dict_table_get_first_index(table)));
+}
+
+/*************************************************************************
+Checks if a consistent read can be performed immediately on the index
+record, or if an older version is needed. */
+UNIV_INLINE
+ibool
+row_vers_clust_rec_sees_older(
+/*==========================*/
+				/* out: FALSE if can read immediately */
+	rec_t*		rec,	/* in: record which should be read or passed
+				over by a read cursor */
+	dict_index_t*	index,	/* in: clustered index */
+	read_view_t*	view)	/* in: read view */
+{
+	ut_ad(index->type & DICT_CLUSTERED);
+
+	if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Checks if a secondary index record can be read immediately by a consistent
+read, or if an older version may be needed. To be sure, we will have to
+look in the clustered index. */
+UNIV_INLINE
+ibool
+row_vers_sec_rec_may_see_older(
+/*===========================*/
+				/* out: FALSE if can be read immediately */
+	rec_t*		rec,	/* in: record which should be read or passed */
+	dict_index_t*	index,	/* in: secondary index */
+	read_view_t*	view)	/* in: read view */
+{
+	page_t*	page;
+	
+	ut_ad(!(index->type & DICT_CLUSTERED));
+
+	page = buf_frame_align(rec);
+
+	if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0)
+	    || recv_recovery_is_on()) {
+
+		/* It may be that the record was inserted or modified by a
+		transaction the view should not see: we have to look in the
+		clustered index */
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
diff --git a/innobase/include/srv0que.h b/innobase/include/srv0que.h
new file mode 100644
index 00000000000..05c339cdd32
--- /dev/null
+++ b/innobase/include/srv0que.h
@@ -0,0 +1,53 @@
+/******************************************************
+Server query execution
+
+(c) 1996 Innobase Oy
+
+Created 6/5/1996 Heikki Tuuri
+*******************************************************/
+
+
+#ifndef srv0que_h
+#define srv0que_h
+
+#include "univ.i"
+#include "que0types.h"
+
+/**************************************************************************
+Checks if there is work to do in the server task queue. If there is, the
+thread starts processing a task. Before leaving, it again checks the task
+queue and picks a new task if any exists. This is called by a SRV_WORKER
+thread. */
+
+void
+srv_que_task_queue_check(void);
+/*==========================*/
+/**************************************************************************
+Performs round-robin on the server tasks. This is called by a SRV_WORKER
+thread every second or so. */
+
+que_thr_t*
+srv_que_round_robin(
+/*================*/
+				/* out: the new (may be == thr) query thread
+				to run */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Enqueues a task to server task queue and releases a worker thread, if
+there exists one suspended. */
+
+void
+srv_que_task_enqueue(
+/*=================*/
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Enqueues a task to server task queue and releases a worker thread, if
+there exists one suspended. */
+
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+	que_thr_t*	thr);	/* in: query thread */
+
+#endif
+
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
new file mode 100644
index 00000000000..6418b903eeb
--- /dev/null
+++ b/innobase/include/srv0srv.h
@@ -0,0 +1,237 @@
+/******************************************************
+The server main program
+
+(c) 1995 Innobase Oy
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+
+#ifndef srv0srv_h
+#define srv0srv_h
+
+#include "univ.i"
+#include "sync0sync.h"
+#include "os0sync.h"
+#include "com0com.h"
+#include "que0types.h"
+
+/* Server parameters which are read from the initfile */
+
+extern char*	srv_data_home;
+extern char*	srv_logs_home;
+extern char*	srv_arch_dir;
+
+extern ulint	srv_n_data_files;
+extern char**	srv_data_file_names;
+extern ulint*	srv_data_file_sizes;
+
+extern char**	srv_log_group_home_dirs;
+
+extern ulint	srv_n_log_groups;
+extern ulint	srv_n_log_files;
+extern ulint	srv_log_file_size;
+extern ibool	srv_log_archive_on;
+extern ulint	srv_log_buffer_size;
+extern ibool	srv_flush_log_at_trx_commit;
+
+extern ibool	srv_use_native_aio;		
+
+extern ulint	srv_pool_size;
+extern ulint	srv_mem_pool_size;
+extern ulint	srv_lock_table_size;
+
+extern ulint	srv_n_file_io_threads;
+
+extern ibool	srv_archive_recovery;
+extern dulint	srv_archive_recovery_limit_lsn;
+
+extern ulint	srv_lock_wait_timeout;
+
+/*-------------------------------------------*/
+extern ulint	srv_n_spin_wait_rounds;
+extern ulint	srv_spin_wait_delay;
+extern ibool	srv_priority_boost;
+		
+extern	ulint	srv_pool_size;
+extern	ulint	srv_mem_pool_size;
+extern	ulint	srv_lock_table_size;
+
+extern	ulint	srv_sim_disk_wait_pct;
+extern	ulint	srv_sim_disk_wait_len;
+extern	ibool	srv_sim_disk_wait_by_yield;
+extern	ibool	srv_sim_disk_wait_by_wait;
+
+extern	ibool	srv_measure_contention;
+extern	ibool	srv_measure_by_spin;
+	
+extern	ibool	srv_print_thread_releases;
+extern	ibool	srv_print_lock_waits;
+extern	ibool	srv_print_buf_io;
+extern	ibool	srv_print_log_io;
+extern	ibool	srv_print_parsed_sql;
+extern	ibool	srv_print_latch_waits;
+
+extern	ibool	srv_test_nocache;
+extern	ibool	srv_test_cache_evict;
+
+extern	ibool	srv_test_extra_mutexes;
+extern	ibool	srv_test_sync;
+extern	ulint	srv_test_n_threads;
+extern	ulint	srv_test_n_loops;
+extern	ulint	srv_test_n_free_rnds;
+extern	ulint	srv_test_n_reserved_rnds;
+extern	ulint	srv_test_n_mutexes;
+extern	ulint	srv_test_array_size;
+
+extern ulint	srv_activity_count;
+
+extern mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
+				query threads, and lock table: we allocate
+				it from dynamic memory to get it to the
+				same DRAM page as other hotspot semaphores */
+#define kernel_mutex (*kernel_mutex_temp)
+				
+typedef struct srv_sys_struct	srv_sys_t;
+
+/* The server system */
+extern srv_sys_t*	srv_sys;
+
+/*************************************************************************
+Boots Innobase server. */
+
+ulint
+srv_boot(void);
+/*==========*/
+			/* out: DB_SUCCESS or error code */
+/*************************************************************************
+Gets the number of threads in the system. */
+
+ulint
+srv_get_n_threads(void);
+/*===================*/
+/*************************************************************************
+Returns the calling thread type. */
+
+ulint
+srv_get_thread_type(void);
+/*=====================*/
+			/* out: SRV_COM, ... */
+/*************************************************************************
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller! */
+
+ulint
+srv_release_threads(
+/*================*/
+			/* out: number of threads released: this may be
+			< n if not enough threads were suspended at the
+			moment */
+	ulint	type,	/* in: thread type */
+	ulint	n);	/* in: number of threads to release */
+/*************************************************************************
+The master thread controlling the server. */
+
+ulint
+srv_master_thread(
+/*==============*/
+			/* out: a dummy parameter */
+	void*	arg);	/* in: a dummy parameter required by
+			os_thread_create */
+/*************************************************************************
+Reads a keyword and a value from a file. */
+
+ulint
+srv_read_init_val(
+/*==============*/
+				/* out: DB_SUCCESS or error code */
+	FILE*	initfile,	/* in: file pointer */
+	char*	keyword,	/* in: keyword before value(s), or NULL if
+				no keyword read */
+	char*	str_buf,	/* in/out: buffer for a string value to read,
+				buffer size must be 10000 bytes, if NULL
+				then not read */
+	ulint*	num_val,	/* out:	numerical value to read, if NULL
+				then not read */
+	ibool	print_not_err);	/* in: if TRUE, then we will not print
+				error messages to console */
+/***********************************************************************
+Tells the Innobase server that there has been activity in the database
+and wakes up the master thread if it is suspended (not sleeping). Used
+in the MySQL interface. Note that there is a small chance that the master
+thread stays suspended (we do not protect our operation with the kernel
+mutex, for performace reasons). */
+
+void
+srv_active_wake_master_thread(void);
+/*===============================*/
+/*******************************************************************
+Puts a MySQL OS thread to wait for a lock to be released. */
+
+ibool
+srv_suspend_mysql_thread(
+/*=====================*/
+				/* out: TRUE if the lock wait timeout was
+				exceeded */
+	que_thr_t*	thr);	/* in: query thread associated with
+				the MySQL OS thread */
+/************************************************************************
+Releases a MySQL OS thread waiting for a lock to be released, if the
+thread is already suspended. */
+
+void
+srv_release_mysql_thread_if_suspended(
+/*==================================*/
+	que_thr_t*	thr);	/* in: query thread associated with the
+				MySQL OS thread  */
+/*************************************************************************
+A thread which wakes up threads whose lock wait may have lasted too long. */
+
+ulint
+srv_lock_timeout_monitor_thread(
+/*============================*/
+			/* out: a dummy parameter */
+	void*	arg);	/* in: a dummy parameter required by
+			os_thread_create */
+
+
+/* Types for the threads existing in the system. Threads of types 4 - 9
+are called utility threads. Note that utility threads are mainly disk
+bound, except that version threads 6 - 7 may also be CPU bound, if
+cleaning versions from the buffer pool. */
+
+#define	SRV_COM		1	/* threads serving communication and queries */
+#define	SRV_CONSOLE	2	/* thread serving console */
+#define	SRV_WORKER	3	/* threads serving parallelized queries and
+				queries released from lock wait */
+#define SRV_BUFFER	4	/* thread flushing dirty buffer blocks,
+				not currently in use */
+#define SRV_RECOVERY	5	/* threads finishing a recovery,
+				not currently in use */
+#define SRV_INSERT	6	/* thread flushing the insert buffer to disk,
+				not currently in use */
+#define SRV_MASTER	7      	/* the master thread, (whose type number must
+				be biggest) */
+
+/* Thread slot in the thread table */
+typedef struct srv_slot_struct	srv_slot_t;
+
+/* Thread table is an array of slots */
+typedef srv_slot_t	srv_table_t;
+
+/* The server system struct */
+struct srv_sys_struct{
+	os_event_t	operational;	/* created threads must wait for the
+					server to become operational by
+					waiting for this event */
+	com_endpoint_t*	endpoint;	/* the communication endpoint of the
+					server */
+
+	srv_table_t*	threads;	/* server thread table */
+	UT_LIST_BASE_NODE_T(que_thr_t)
+			tasks;		/* task queue */
+};
+
+extern ulint	srv_n_threads_active[];
+
+#endif
diff --git a/innobase/include/srv0srv.ic b/innobase/include/srv0srv.ic
new file mode 100644
index 00000000000..73e0729660f
--- /dev/null
+++ b/innobase/include/srv0srv.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Server main program
+
+(c) 1995 Innobase Oy
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h
new file mode 100644
index 00000000000..66eeb4f2e3c
--- /dev/null
+++ b/innobase/include/srv0start.h
@@ -0,0 +1,31 @@
+/******************************************************
+Starts the Innobase database server
+
+(c) 1995-2000 Innobase Oy
+
+Created 10/10/1995 Heikki Tuuri
+*******************************************************/
+
+
+#ifndef srv0start_h
+#define srv0start_h
+
+#include "univ.i"
+
+/********************************************************************
+Starts Innobase and creates a new database if database files
+are not found and the user wants. Server parameters are
+read from a file of name "srv_init" in the ib_home directory. */
+
+int
+innobase_start_or_create_for_mysql(void);
+/*====================================*/
+				/* out: DB_SUCCESS or error code */
+/********************************************************************
+Shuts down the Innobase database. */
+
+int
+innobase_shutdown_for_mysql(void);
+/*=============================*/
+				/* out: DB_SUCCESS or error code */
+#endif
diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h
new file mode 100644
index 00000000000..75d79f4c93f
--- /dev/null
+++ b/innobase/include/sync0arr.h
@@ -0,0 +1,114 @@
+/******************************************************
+The wait array used in synchronization primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0arr_h
+#define sync0arr_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "ut0mem.h"
+#include "os0thread.h"
+
+typedef struct sync_cell_struct        	sync_cell_t;
+typedef struct sync_array_struct	sync_array_t;
+
+#define SYNC_ARRAY_OS_MUTEX	1
+#define SYNC_ARRAY_MUTEX	2
+
+/***********************************************************************
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called. */
+
+sync_array_t*
+sync_array_create(
+/*==============*/
+				/* out, own: created wait array */
+	ulint	n_cells,	/* in: number of cells in the array
+				to create */
+	ulint	protection);	/* in: either SYNC_ARRAY_OS_MUTEX or
+				SYNC_ARRAY_MUTEX: determines the type
+				of mutex protecting the data structure */
+/**********************************************************************
+Frees the resources in a wait array. */
+
+void
+sync_array_free(
+/*============*/
+	sync_array_t*	arr);	/* in, own: sync wait array */
+/**********************************************************************
+Reserves a wait array cell for waiting for an object.
+The event of the cell is reset to nonsignalled state. */
+
+void
+sync_array_reserve_cell(
+/*====================*/
+        sync_array_t*	arr,	/* in: wait array */
+        void*   	object, /* in: pointer to the object to wait for */
+        ulint		type,	/* in: lock request type */
+	#ifdef UNIV_SYNC_DEBUG
+        char*		file,	/* in: in debug version file where
+        			requested */
+        ulint		line,	/* in: in the debug version line where
+        			requested */
+	#endif
+        ulint*   	index);  /* out: index of the reserved cell */
+/**********************************************************************
+This function should be called when a thread starts to wait on
+a wait array cell. In the debug version this function checks
+if the wait for a semaphore will result in a deadlock, in which
+case prints info and asserts. */
+
+void
+sync_array_wait_event(
+/*==================*/
+        sync_array_t*	arr,	/* in: wait array */
+        ulint   	index);  /* in: index of the reserved cell */
+/**********************************************************************
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+
+void
+sync_array_free_cell(
+/*=================*/
+	sync_array_t*	arr,	/* in: wait array */
+        ulint    	index);  /* in: index of the cell in array */
+/**************************************************************************
+Looks for the cells in the wait array which refer
+to the wait object specified,
+and sets their corresponding events to the signaled state. In this
+way releases the threads waiting for the object to contend for the object.
+It is possible that no such cell is found, in which case does nothing. */
+
+void
+sync_array_signal_object(
+/*=====================*/
+	sync_array_t*	arr,	/* in: wait array */
+	void*		object);/* in: wait object */
+/************************************************************************
+Validates the integrity of the wait array. Checks
+that the number of reserved cells equals the count variable. */
+
+void
+sync_array_validate(
+/*================*/
+	sync_array_t*	arr);	/* in: sync wait array */
+/**************************************************************************
+Prints info of the wait array. */
+
+void
+sync_array_print_info(
+/*==================*/
+	sync_array_t*	arr);	/* in: wait array */
+
+
+#ifndef UNIV_NONINL
+#include "sync0arr.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0arr.ic b/innobase/include/sync0arr.ic
new file mode 100644
index 00000000000..dbe35c033e5
--- /dev/null
+++ b/innobase/include/sync0arr.ic
@@ -0,0 +1,10 @@
+/******************************************************
+The wait array for synchronization primitives
+
+Inline code
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
diff --git a/innobase/include/sync0ipm.h b/innobase/include/sync0ipm.h
new file mode 100644
index 00000000000..3244a6d26de
--- /dev/null
+++ b/innobase/include/sync0ipm.h
@@ -0,0 +1,113 @@
+/******************************************************
+A fast mutex for interprocess synchronization.
+mutex_t can be used only within single process,
+but ip mutex also between processes.
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0ipm_h
+#define sync0ipm_h
+
+#include "univ.i"
+#include "os0sync.h"
+#include "sync0sync.h"
+
+typedef struct ip_mutex_hdl_struct	ip_mutex_hdl_t;
+typedef struct ip_mutex_struct		ip_mutex_t;
+
+/* NOTE! The structure appears here only for the compiler to
+know its size. Do not use its fields directly!
+The structure used in a fast implementation of
+an interprocess mutex. */
+
+struct ip_mutex_struct {
+	mutex_t		mutex;		/* Ordinary mutex struct */
+	ulint		waiters;	/* This field is set to 1 if
+					there may be waiters */
+};
+
+/* The performance of the ip mutex in NT depends on how often
+a thread has to suspend itself waiting for the ip mutex
+to become free. The following variable counts system calls
+involved. */
+
+extern ulint	ip_mutex_system_call_count;
+
+/**********************************************************************
+Creates, or rather, initializes
+an ip mutex object in a specified shared memory location (which must be
+appropriately aligned). The ip mutex is initialized in the reset state.
+NOTE! Explicit destroying of the ip mutex with ip_mutex_free
+is not recommended
+as the mutex resides in shared memory and we cannot make sure that
+no process is currently accessing it. Therefore just use
+ip_mutex_close to free the operating system event and mutex. */
+
+ulint
+ip_mutex_create(
+/*============*/
+					/* out: 0 if succeed */
+	ip_mutex_t*	ip_mutex,	/* in: pointer to shared memory */
+	char*		name,		/* in: name of the ip mutex */
+	ip_mutex_hdl_t** handle);	/* out, own: handle to the
+					created mutex; handle exists
+					in the private address space of
+					the calling process */
+/**********************************************************************
+NOTE! Using this function is not recommended. See the note
+on ip_mutex_create. Destroys an ip mutex */
+
+void
+ip_mutex_free(
+/*==========*/
+	ip_mutex_hdl_t*	handle);		/* in, own: ip mutex handle */
+/**********************************************************************
+Opens an ip mutex object in a specified shared memory location.
+Explicit closing of the ip mutex with ip_mutex_close is necessary to
+free the operating system event and mutex created, and the handle. */
+
+ulint
+ip_mutex_open(
+/*==========*/
+					/* out: 0 if succeed */
+	ip_mutex_t*	ip_mutex,	/* in: pointer to shared memory */
+	char*		name,		/* in: name of the ip mutex */
+	ip_mutex_hdl_t** handle);	/* out, own: handle to the
+					opened mutex */
+/**********************************************************************
+Closes an ip mutex. */
+
+void
+ip_mutex_close(
+/*===========*/
+	ip_mutex_hdl_t*	handle);	/* in, own: ip mutex handle */
+/******************************************************************
+Reserves an ip mutex. */
+UNIV_INLINE
+ulint
+ip_mutex_enter(
+/*===========*/
+					/* out: 0 if success, 
+					SYNC_TIME_EXCEEDED if timeout */
+	ip_mutex_hdl_t*	ip_mutex_hdl,	/* in: pointer to ip mutex handle */
+	ulint		time);		/* in: maximum time to wait, in
+					microseconds, or 
+					SYNC_INFINITE_TIME */
+/******************************************************************
+Releases an ip mutex. */
+UNIV_INLINE
+void
+ip_mutex_exit(
+/*==========*/
+	ip_mutex_hdl_t*	ip_mutex_hdl);	/* in: pointer to ip mutex handle */
+
+
+
+#ifndef UNIV_NONINL
+#include "sync0ipm.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0ipm.ic b/innobase/include/sync0ipm.ic
new file mode 100644
index 00000000000..8487830e1dd
--- /dev/null
+++ b/innobase/include/sync0ipm.ic
@@ -0,0 +1,182 @@
+/******************************************************
+A fast mutex for interprocess synchronization.
+mutex_t can be used only within single process,
+but ip_mutex_t also between processes.
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+/* An extra structure created in the private address space of each process
+which creates or opens the ip mutex. */
+
+struct ip_mutex_hdl_struct {
+	ip_mutex_t*	ip_mutex;	/* pointer to ip mutex */
+	os_event_t	released;	/* event which signals that the mutex
+					is released; this is obtained from
+					create or open of an ip mutex */
+	os_mutex_t	exclude;	/* os mutex obtained when ip mutex is
+					created or opened */
+};
+
+
+UNIV_INLINE
+ulint
+ip_mutex_get_waiters(
+volatile ip_mutex_t*	ipm);
+UNIV_INLINE
+void
+ip_mutex_set_waiters(
+volatile ip_mutex_t*	ipm,
+	ulint		flag);
+UNIV_INLINE
+mutex_t*
+ip_mutex_get_mutex(
+	ip_mutex_t*	ipm);
+
+
+/******************************************************************
+Accessor functions for ip mutex. */
+UNIV_INLINE
+ulint
+ip_mutex_get_waiters(
+volatile ip_mutex_t*	ipm)
+{
+	return(ipm->waiters);
+}
+UNIV_INLINE
+void
+ip_mutex_set_waiters(
+volatile ip_mutex_t*	ipm,
+	ulint		flag)
+{
+	ipm->waiters = flag;
+}
+UNIV_INLINE
+mutex_t*
+ip_mutex_get_mutex(
+	ip_mutex_t*	ipm)
+{
+	return(&(ipm->mutex));
+}
+
+/******************************************************************
+Reserves an ip mutex. */
+UNIV_INLINE
+ulint
+ip_mutex_enter(
+/*===========*/
+					/* out: 0 if success, 
+					SYNC_TIME_EXCEEDED if timeout */
+	ip_mutex_hdl_t*	ip_mutex_hdl,	/* in: pointer to ip mutex handle */
+	ulint		time)		/* in: maximum time to wait, in
+					microseconds, or 
+					SYNC_INFINITE_TIME */
+{
+	mutex_t*	mutex;
+	os_event_t	released;
+	os_mutex_t	exclude;
+	ip_mutex_t*	ip_mutex;
+	ulint		loop_count;
+	ulint		ret;
+
+	ip_mutex = ip_mutex_hdl->ip_mutex;
+	released = ip_mutex_hdl->released;
+	exclude = ip_mutex_hdl->exclude;
+
+	mutex = ip_mutex_get_mutex(ip_mutex);
+
+	loop_count = 0;
+loop:
+	loop_count++;
+	ut_ad(loop_count < 15);
+
+	if (mutex_enter_nowait(mutex) == 0) {
+		/* Succeeded! */
+
+		return(0);
+	}
+	
+	ip_mutex_system_call_count++;
+
+	os_event_reset(released);
+
+	/* Order is important here: FIRST reset event, then set waiters */
+	ip_mutex_set_waiters(ip_mutex, 1);
+
+	if (mutex_enter_nowait(mutex) == 0) {
+		/* Succeeded! */
+
+		return(0);
+	}
+
+	if (time == SYNC_INFINITE_TIME) {
+		time = OS_SYNC_INFINITE_TIME;
+	}
+
+	/* Suspend to wait for release */
+
+	ip_mutex_system_call_count++;
+
+	ret = os_event_wait_time(released, time);
+
+	ip_mutex_system_call_count++;
+
+	os_mutex_enter(exclude);
+	ip_mutex_system_call_count++;
+	os_mutex_exit(exclude);
+
+	if (ret != 0) {
+		ut_a(ret == OS_SYNC_TIME_EXCEEDED);
+
+		return(SYNC_TIME_EXCEEDED);
+	}
+
+	goto loop;
+}
+
+/******************************************************************
+Releases an ip mutex. */
+UNIV_INLINE
+void
+ip_mutex_exit(
+/*==========*/
+	ip_mutex_hdl_t*	ip_mutex_hdl)	/* in: pointer to ip mutex handle */
+{
+	mutex_t*	mutex;
+	os_event_t	released;
+	os_mutex_t	exclude;
+	ip_mutex_t*	ip_mutex;
+
+	ip_mutex = ip_mutex_hdl->ip_mutex;
+	released = ip_mutex_hdl->released;
+	exclude = ip_mutex_hdl->exclude;
+
+	mutex = ip_mutex_get_mutex(ip_mutex);
+
+	mutex_exit(mutex);
+
+	if (ip_mutex_get_waiters(ip_mutex) != 0) {
+		
+		ip_mutex_set_waiters(ip_mutex, 0);
+
+		/* Order is important here: FIRST reset waiters, 
+		then set event */
+
+		ip_mutex_system_call_count++;
+		os_mutex_enter(exclude);					
+
+		/* The use of the exclude mutex seems to prevent some
+		kind of a convoy problem in the test tsproc.c. We do
+		not know why. */
+
+		ip_mutex_system_call_count++;
+
+		os_event_set(released);
+	
+		ip_mutex_system_call_count++;
+
+		os_mutex_exit(exclude);
+	}
+}
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
new file mode 100644
index 00000000000..20afdfb025f
--- /dev/null
+++ b/innobase/include/sync0rw.h
@@ -0,0 +1,493 @@
+/******************************************************
+The read-write lock (for threads, not for database transactions)
+
+(c) 1995 Innobase Oy
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0rw_h
+#define sync0rw_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "sync0sync.h"
+#include "os0sync.h"
+
+/* The following undef is to prevent a name conflict with a macro
+in MySQL: */
+#undef rw_lock_t
+
+/* Latch types; these are used also in btr0btr.h: keep the numerical values
+smaller than 30 and the order of the numerical values like below! */
+#define RW_S_LATCH	1
+#define	RW_X_LATCH	2
+#define	RW_NO_LATCH	3
+
+typedef struct rw_lock_struct		rw_lock_t;
+typedef struct rw_lock_debug_struct	rw_lock_debug_t;
+
+typedef UT_LIST_BASE_NODE_T(rw_lock_t)	rw_lock_list_t;
+
+extern rw_lock_list_t 	rw_lock_list;
+extern mutex_t		rw_lock_list_mutex;
+
+/* The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+
+acquired in addition to the mutex protecting the lock. */
+extern mutex_t		rw_lock_debug_mutex;
+extern os_event_t	rw_lock_debug_event;	/* If deadlock detection does
+					not get immediately the mutex it
+					may wait for this event */
+extern ibool		rw_lock_debug_waiters;	/* This is set to TRUE, if
+					there may be waiters for the event */
+
+extern	ulint	rw_s_system_call_count;
+extern	ulint	rw_s_spin_wait_count;
+extern	ulint	rw_s_exit_count;
+
+extern	ulint	rw_x_system_call_count;
+extern	ulint	rw_x_spin_wait_count;
+extern	ulint	rw_x_exit_count;
+
+/**********************************************************************
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+
+#define rw_lock_create(L)	rw_lock_create_func((L), __FILE__, __LINE__)
+/*=====================*/
+/**********************************************************************
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+
+void
+rw_lock_create_func(
+/*================*/
+	rw_lock_t*	lock,		/* in: pointer to memory */
+	char*		cfile_name,	/* in: file name where created */
+	ulint		cline);		/* in: file line where created */
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the rw-lock is freed. Removes an rw-lock object from the global list. The
+rw-lock is checked to be in the non-locked state. */
+
+void
+rw_lock_free(
+/*=========*/
+	rw_lock_t*	lock);	/* in: rw-lock */
+/**********************************************************************
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks. */
+
+ibool
+rw_lock_validate(
+/*=============*/
+	rw_lock_t*	lock);
+/******************************************************************
+NOTE! The following macros should be used in rw s-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_lock(M)    rw_lock_s_lock_func(\
+					  (M), 0, __FILE__, __LINE__)
+#else
+#define rw_lock_s_lock(M)    rw_lock_s_lock_func(M)
+#endif
+/******************************************************************
+NOTE! The following macros should be used in rw s-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_lock_gen(M, P)    rw_lock_s_lock_func(\
+					  (M), (P), __FILE__, __LINE__)
+#else
+#define rw_lock_s_lock_gen(M, P)    rw_lock_s_lock_func(M)
+#endif
+/******************************************************************
+NOTE! The following macros should be used in rw s-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_lock_nowait(M)    rw_lock_s_lock_func_nowait(\
+					     (M), __FILE__, __LINE__)
+#else
+#define rw_lock_s_lock_nowait(M)    rw_lock_s_lock_func_nowait(M)
+#endif
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock an rw-lock in shared mode
+for the current thread. If the rw-lock is locked in exclusive mode, or
+there is an exclusive lock request waiting, the function spins a preset
+time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
+suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass,	/* in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function, except if
+you supply the file name and line number. Lock an rw-lock in shared mode
+for the current thread if the lock can be acquired immediately. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_func_nowait(
+/*=======================*/
+				/* out: TRUE if success */
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_func_nowait(
+/*=======================*/
+				/* out: TRUE if success */
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+Releases a shared mode lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+	rw_lock_t*	lock	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass	/* in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	);
+/***********************************************************************
+Releases a shared mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_unlock(L)    rw_lock_s_unlock_func(L, 0)
+#else
+#define rw_lock_s_unlock(L)    rw_lock_s_unlock_func(L)
+#endif
+/***********************************************************************
+Releases a shared mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_s_unlock_gen(L, P)    rw_lock_s_unlock_func(L, P)
+#else
+#define rw_lock_s_unlock_gen(L, P)    rw_lock_s_unlock_func(L)
+#endif
+/******************************************************************
+NOTE! The following macro should be used in rw x-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_lock(M)    rw_lock_x_lock_func(\
+					  (M), 0, __FILE__, __LINE__)
+#else
+#define rw_lock_x_lock(M)    rw_lock_x_lock_func(M, 0)
+#endif
+/******************************************************************
+NOTE! The following macro should be used in rw x-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_lock_gen(M, P)    rw_lock_x_lock_func(\
+					  (M), (P), __FILE__, __LINE__)
+#else
+#define rw_lock_x_lock_gen(M, P)    rw_lock_x_lock_func(M, P)
+#endif
+/******************************************************************
+NOTE! The following macros should be used in rw x-locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_lock_nowait(M)    rw_lock_x_lock_func_nowait(\
+					     (M), __FILE__, __LINE__)
+#else
+#define rw_lock_x_lock_nowait(M)    rw_lock_x_lock_func_nowait(M)
+#endif
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+
+void
+rw_lock_x_lock_func(
+/*================*/
+        rw_lock_t*   	lock,  	/* in: pointer to rw-lock */
+	ulint		pass	/* in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+	rw_lock_t*	lock	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass	/* in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	);
+/***********************************************************************
+Releases an exclusive mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_unlock(L)    rw_lock_x_unlock_func(L, 0)
+#else
+#define rw_lock_x_unlock(L)    rw_lock_x_unlock_func(L)
+#endif
+/***********************************************************************
+Releases an exclusive mode lock. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define rw_lock_x_unlock_gen(L, P)    rw_lock_x_unlock_func(L, P)
+#else
+#define rw_lock_x_unlock_gen(L, P)    rw_lock_x_unlock_func(L)
+#endif
+/**********************************************************************
+Low-level function which locks an rw-lock in s-mode when we know that it
+is possible and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_s_lock_direct(
+/*==================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+Low-level function which locks an rw-lock in x-mode when we know that it
+is not locked and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_x_lock_direct(
+/*==================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+This function is used in the insert buffer to move the ownership of an
+x-latch on a buffer frame to the current thread. The x-latch was set by
+the buffer read operation and it protected the buffer frame while the
+read was done. The ownership is moved because we want that the current
+thread is able to acquire a second x-latch which is stored in an mtr.
+This, in turn, is needed to pass the debug checks of index page
+operations. */
+
+void
+rw_lock_x_lock_move_ownership(
+/*==========================*/
+	rw_lock_t*	lock);	/* in: lock which was x-locked in the
+				buffer read */
+/**********************************************************************
+Releases a shared mode lock when we know there are no waiters and none
+else will access the lock during the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_direct(
+/*====================*/
+	rw_lock_t*	lock);	/* in: rw-lock */
+/**********************************************************************
+Releases an exclusive mode lock when we know there are no waiters, and
+none else will access the lock durint the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_direct(
+/*====================*/
+	rw_lock_t*	lock);	/* in: rw-lock */
+/**********************************************************************
+Sets the rw-lock latching level field. */
+
+void
+rw_lock_set_level(
+/*==============*/
+	rw_lock_t*	lock,	/* in: rw-lock */
+	ulint		level);	/* in: level */
+/**********************************************************************
+Returns the value of writer_count for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call. */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+				/* out: value of writer_count */
+	rw_lock_t*	lock);	/* in: rw-lock */
+/**********************************************************************
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+
+ibool
+rw_lock_own(
+/*========*/
+	rw_lock_t*	lock,		/* in: rw-lock */
+	ulint		lock_type);	/* in: lock type */
+/**********************************************************************
+Checks if somebody has locked the rw-lock in the specified mode. */
+
+ibool
+rw_lock_is_locked(
+/*==============*/
+	rw_lock_t*	lock,		/* in: rw-lock */
+	ulint		lock_type);	/* in: lock type: RW_LOCK_SHARED,
+					RW_LOCK_EX */
+/*******************************************************************
+Prints debug info of an rw-lock. */
+
+void
+rw_lock_print(
+/*==========*/
+	rw_lock_t*	lock);	/* in: rw-lock */
+/*******************************************************************
+Prints debug info of currently locked rw-locks. */
+
+void
+rw_lock_list_print_info(void);
+/*=========================*/
+/*******************************************************************
+Returns the number of currently locked rw-locks.
+Works only in the debug version. */
+
+ulint
+rw_lock_n_locked(void);
+/*==================*/
+
+/*#####################################################################*/
+
+/**********************************************************************
+Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+
+void
+rw_lock_debug_mutex_enter(void);
+/*==========================*/
+/**********************************************************************
+Releases the debug mutex. */
+
+void
+rw_lock_debug_mutex_exit(void);
+/*==========================*/
+/*************************************************************************
+Prints info of a debug struct. */
+
+void
+rw_lock_debug_print(
+/*================*/
+	rw_lock_debug_t*	info);	/* in: debug struct */
+
+
+#define	RW_CNAME_LEN	8
+
+/* NOTE! The structure appears here only for the compiler to know its size.
+Do not use its fields directly! The structure used in the spin lock
+implementation of a read-write lock. Several threads may have a shared lock
+simultaneously in this lock, but only one writer may have an exclusive lock,
+in which case no shared locks are allowed. To prevent starving of a writer
+blocked by readers, a writer may queue for the lock by setting the writer
+field. Then no new readers are allowed in. */
+
+struct rw_lock_struct {
+	ulint	reader_count;	/* Number of readers who have locked this
+				lock in the shared mode */
+	ulint	writer; 	/* This field is set to RW_LOCK_EX if there
+				is a writer owning the lock (in exclusive
+				mode), RW_LOCK_WAIT_EX if a writer is
+				queueing for the lock, and
+				RW_LOCK_NOT_LOCKED, otherwise. */
+	os_thread_id_t	writer_thread;
+				/* Thread id of a possible writer thread */
+	ulint	writer_count;	/* Number of times the same thread has
+				recursively locked the lock in the exclusive
+				mode */
+	mutex_t	mutex;		/* The mutex protecting rw_lock_struct */
+	ulint	pass; 		/* Default value 0. This is set to some
+				value != 0 given by the caller of an x-lock
+				operation, if the x-lock is to be passed to
+				another thread to unlock (which happens in
+				asynchronous i/o). */
+	ulint	waiters;	/* This ulint is set to 1 if there are
+				waiters (readers or writers) in the global
+				wait array, waiting for this rw_lock.
+				Otherwise, = 0. */
+	ibool	writer_is_wait_ex;
+				/* This is TRUE if the writer field is
+				RW_LOCK_WAIT_EX; this field is located far
+				from the memory update hotspot fields which
+				are at the start of this struct, thus we can
+				peek this field without causing much memory
+				bus traffic */
+	UT_LIST_NODE_T(rw_lock_t) list;
+				/* All allocated rw locks are put into a
+				list */
+	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+				/* In the debug version: pointer to the debug
+				info list of the lock */
+	ulint	level;		/* Debug version: level in the global latching
+				order; default SYNC_LEVEL_NONE */
+	char	cfile_name[RW_CNAME_LEN];
+				/* File name where lock created */
+	ulint	cline;		/* Line where created */
+	ulint	magic_n;
+};
+
+#define	RW_LOCK_MAGIC_N	22643
+
+/* The structure for storing debug info of an rw-lock */
+struct	rw_lock_debug_struct {
+
+	os_thread_id_t thread_id;  /* The thread id of the thread which
+				locked the rw-lock */
+	ulint	pass;		/* Pass value given in the lock operation */
+	ulint	lock_type;	/* Type of the lock: RW_LOCK_EX,
+				RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
+	char*	file_name;	/* File name where the lock was obtained */
+	ulint	line;		/* Line where the rw-lock was locked */
+	UT_LIST_NODE_T(rw_lock_debug_t) list;
+				/* Debug structs are linked in a two-way
+				list */
+};
+
+#ifndef UNIV_NONINL
+#include "sync0rw.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
new file mode 100644
index 00000000000..11add13d2d0
--- /dev/null
+++ b/innobase/include/sync0rw.ic
@@ -0,0 +1,510 @@
+/******************************************************
+The read-write lock (for threads)
+
+(c) 1995 Innobase Oy
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+/**********************************************************************
+Lock an rw-lock in shared mode for the current thread. If the rw-lock is
+locked in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
+waiting for the lock before suspending the thread. */
+
+void
+rw_lock_s_lock_spin(
+/*================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass,	/* in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+Inserts the debug information for an rw-lock. */
+
+void
+rw_lock_add_debug_info(
+/*===================*/
+	rw_lock_t*	lock,		/* in: rw-lock */
+	ulint		pass,		/* in: pass value */
+	ulint		lock_type,	/* in: lock type */
+	char*		file_name,	/* in: file where requested */
+	ulint		line);		/* in: line where requested */
+/**********************************************************************
+Removes a debug information struct for an rw-lock. */
+
+void
+rw_lock_remove_debug_info(
+/*======================*/
+	rw_lock_t*	lock,		/* in: rw-lock */
+	ulint		pass,		/* in: pass value */
+	ulint		lock_type);	/* in: lock type */
+
+
+/************************************************************************
+Accessor functions for rw lock. */
+UNIV_INLINE
+ulint
+rw_lock_get_waiters(
+/*================*/
+	rw_lock_t*	lock)
+{
+	return(lock->waiters);
+}
+UNIV_INLINE
+void
+rw_lock_set_waiters(
+/*================*/
+	rw_lock_t*	lock,
+	ulint		flag)
+{
+	lock->waiters = flag;
+}
+UNIV_INLINE
+ulint
+rw_lock_get_writer(
+/*===============*/
+	rw_lock_t*	lock)
+{
+	return(lock->writer);
+}
+UNIV_INLINE
+void
+rw_lock_set_writer(
+/*===============*/
+	rw_lock_t*	lock,
+	ulint		flag)
+{
+	lock->writer = flag;
+}
+UNIV_INLINE
+ulint
+rw_lock_get_reader_count(
+/*=====================*/
+	rw_lock_t*	lock)
+{
+	return(lock->reader_count);
+}
+UNIV_INLINE
+void
+rw_lock_set_reader_count(
+/*=====================*/
+	rw_lock_t*	lock,
+	ulint		count)
+{
+	lock->reader_count = count;
+}
+UNIV_INLINE
+mutex_t*
+rw_lock_get_mutex(
+/*==============*/
+	rw_lock_t*	lock)
+{
+	return(&(lock->mutex));
+}
+
+/**********************************************************************
+Returns the value of writer_count for the lock. Does not reserve the lock
+mutex, so the caller must be sure it is not changed during the call. */
+UNIV_INLINE
+ulint
+rw_lock_get_x_lock_count(
+/*=====================*/
+				/* out: value of writer_count */
+	rw_lock_t*	lock)	/* in: rw-lock */
+{
+	return(lock->writer_count);
+}
+
+/**********************************************************************
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+				/* out: TRUE if success */
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass,	/* in: pass value; != 0, if the lock will be
+				passed to another thread to unlock */
+	char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+)
+{
+	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+
+	/* Check if the writer field is free */
+
+	if (lock->writer == RW_LOCK_NOT_LOCKED) {
+		/* Set the shared lock by incrementing the reader count */
+		lock->reader_count++;
+
+		#ifdef UNIV_SYNC_DEBUG
+		rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
+									line);
+		#endif
+		
+		return(TRUE);	/* locking succeeded */
+	}
+
+	return(FALSE);	/* locking did not succeed */
+}
+
+/**********************************************************************
+Low-level function which locks an rw-lock in s-mode when we know that it
+is possible and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_s_lock_direct(
+/*==================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+)
+{
+	ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
+	ut_ad(rw_lock_get_reader_count(lock) == 0);
+	
+	/* Set the shared lock by incrementing the reader count */
+	lock->reader_count++;
+
+	#ifdef UNIV_SYNC_DEBUG
+	rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
+	#endif
+}
+
+/**********************************************************************
+Low-level function which locks an rw-lock in x-mode when we know that it
+is not locked and none else is currently accessing the rw-lock structure.
+Then we can do the locking without reserving the mutex. */
+UNIV_INLINE
+void
+rw_lock_x_lock_direct(
+/*==================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+)
+{
+        ut_ad(rw_lock_validate(lock));
+	ut_ad(rw_lock_get_reader_count(lock) == 0);
+	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+
+	rw_lock_set_writer(lock, RW_LOCK_EX);
+	lock->writer_thread = os_thread_get_curr_id();
+	lock->writer_count++;
+	lock->pass = 0;
+			
+	#ifdef UNIV_SYNC_DEBUG
+	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+	#endif
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in shared mode for the current thread. If the rw-lock is locked
+in exclusive mode, or there is an exclusive lock request waiting, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
+the lock, before suspending the thread. */
+UNIV_INLINE
+void
+rw_lock_s_lock_func(
+/*================*/
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass,	/* in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+)
+{
+	/* NOTE: As we do not know the thread ids for threads which have
+	s-locked a latch, and s-lockers will be served only after waiting
+	x-lock requests have been fulfilled, then if this thread already
+	owns an s-lock here, it may end up in a deadlock with another thread
+	which requests an x-lock here. Therefore, we will forbid recursive
+	s-locking of a latch: the following assert will warn the programmer
+	of the possibility of a tjis kind of deadlock. If we want to implement
+	safe recursive s-locking, we should keep in a list the thread ids of
+	the threads which have s-locked a latch. This would use some CPU
+	time. */
+	
+	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
+
+	mutex_enter(rw_lock_get_mutex(lock));
+
+	if (TRUE == rw_lock_s_lock_low(lock
+			#ifdef UNIV_SYNC_DEBUG
+			,pass, file_name, line
+			#endif
+			   )) {
+		mutex_exit(rw_lock_get_mutex(lock));
+
+		return; /* Success */
+	} else {
+		/* Did not succeed, try spin wait */
+		mutex_exit(rw_lock_get_mutex(lock));
+
+		rw_lock_s_lock_spin(lock
+				#ifdef UNIV_SYNC_DEBUG
+				,pass, file_name, line
+				#endif
+			   	);
+		return;
+	}
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in shared mode for the current thread if the lock can be acquired
+immediately. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_func_nowait(
+/*=======================*/
+				/* out: TRUE if success */
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+)
+{
+	ibool	success	= FALSE;
+
+	mutex_enter(rw_lock_get_mutex(lock));
+
+	if (lock->writer == RW_LOCK_NOT_LOCKED) {
+		/* Set the shared lock by incrementing the reader count */
+		lock->reader_count++;
+
+		#ifdef UNIV_SYNC_DEBUG
+		rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
+									line);
+		#endif
+		
+		success = TRUE;
+	}
+
+	mutex_exit(rw_lock_get_mutex(lock));
+
+	return(success);
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_func_nowait(
+/*=======================*/
+				/* out: TRUE if success */
+        rw_lock_t*   	lock  	/* in: pointer to rw-lock */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where lock requested */
+	ulint		line	/* in: line where requested */
+	#endif
+)
+{
+	ibool	success	= FALSE;
+	
+	mutex_enter(rw_lock_get_mutex(lock));
+
+	if ((rw_lock_get_reader_count(lock) == 0)
+	     && ((rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)
+	     	 || ((rw_lock_get_writer(lock) == RW_LOCK_EX)
+	     	     && (lock->pass == 0)
+	     	     && (lock->writer_thread == os_thread_get_curr_id())))) {
+
+		rw_lock_set_writer(lock, RW_LOCK_EX);
+		lock->writer_thread = os_thread_get_curr_id();
+		lock->writer_count++;
+		lock->pass = 0;
+			
+		#ifdef UNIV_SYNC_DEBUG
+		rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+		#endif
+
+		success = TRUE;
+	}
+
+	mutex_exit(rw_lock_get_mutex(lock));
+
+        ut_ad(rw_lock_validate(lock));
+
+	return(success);
+}
+
+/**********************************************************************
+Releases a shared mode lock. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_func(
+/*==================*/
+	rw_lock_t*	lock	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass	/* in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	)
+{
+	mutex_t*	mutex	= &(lock->mutex);
+	ibool		sg 	= FALSE;
+
+        /* Acquire the mutex protecting the rw-lock fields */
+	mutex_enter(mutex);
+
+	/* Reset the shared lock by decrementing the reader count */
+
+	ut_ad(lock->reader_count > 0);
+	lock->reader_count--;
+
+	#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
+	#endif
+	
+	/* If there may be waiters and this was the last s-lock,
+	signal the object */
+
+	if (lock->waiters && (lock->reader_count == 0)) {
+	       	sg = TRUE;
+
+		rw_lock_set_waiters(lock, 0);
+	}
+	
+	mutex_exit(mutex);
+
+	if (sg == TRUE) {
+		sync_array_signal_object(sync_primary_wait_array, lock);
+	}
+
+        ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+	rw_s_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Releases a shared mode lock when we know there are no waiters and none
+else will access the lock during the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_s_unlock_direct(
+/*====================*/
+	rw_lock_t*	lock)	/* in: rw-lock */
+{
+	/* Reset the shared lock by decrementing the reader count */
+
+	ut_ad(lock->reader_count > 0);
+
+	lock->reader_count--;
+
+	#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
+	#endif
+
+	ut_ad(!lock->waiters);
+        ut_ad(rw_lock_validate(lock));
+#ifdef UNIV_SYNC_PERF_STAT
+	rw_s_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Releases an exclusive mode lock. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_func(
+/*==================*/
+	rw_lock_t*	lock	/* in: rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+	,ulint		pass	/* in: pass value; != 0, if the lock may have
+				been passed to another thread to unlock */
+#endif
+	)
+{
+	ibool	sg 	= FALSE;
+
+        /* Acquire the mutex protecting the rw-lock fields */
+	mutex_enter(&(lock->mutex));
+
+	/* Reset the exclusive lock if this thread no longer has an x-mode
+	lock */
+
+	ut_ad(lock->writer_count > 0);
+
+	lock->writer_count--;
+
+	if (lock->writer_count == 0) {
+		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+	}
+
+	#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
+	#endif
+	
+	/* If there may be waiters, signal the lock */
+	if (lock->waiters && (lock->writer_count == 0)) {
+
+	       	sg = TRUE;
+		rw_lock_set_waiters(lock, 0);
+	}
+	
+	mutex_exit(&(lock->mutex));
+
+	if (sg == TRUE) {
+		sync_array_signal_object(sync_primary_wait_array, lock);
+	}
+
+        ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+	rw_x_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Releases an exclusive mode lock when we know there are no waiters, and
+none else will access the lock durint the time this function is executed. */
+UNIV_INLINE
+void
+rw_lock_x_unlock_direct(
+/*====================*/
+	rw_lock_t*	lock)	/* in: rw-lock */
+{
+	/* Reset the exclusive lock if this thread no longer has an x-mode
+	lock */
+
+	ut_ad(lock->writer_count > 0);
+
+	lock->writer_count--;
+
+	if (lock->writer_count == 0) {
+		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+	}
+
+	#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+	#endif
+
+	ut_ad(!lock->waiters);
+        ut_ad(rw_lock_validate(lock));
+
+#ifdef UNIV_SYNC_PERF_STAT
+	rw_x_exit_count++;
+#endif
+}
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
new file mode 100644
index 00000000000..87c4628d2e4
--- /dev/null
+++ b/innobase/include/sync0sync.h
@@ -0,0 +1,497 @@
+/******************************************************
+Mutex, the basic synchronization primitive
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0sync_h
+#define sync0sync_h
+
+#include "univ.i"
+#include "sync0types.h"
+#include "ut0lst.h"
+#include "ut0mem.h"
+#include "os0thread.h"
+#include "os0sync.h"
+#include "sync0arr.h"
+
+/**********************************************************************
+Initializes the synchronization data structures. */
+
+void
+sync_init(void);
+/*===========*/
+/**********************************************************************
+Frees the resources in synchronization data structures. */
+
+void
+sync_close(void);
+/*===========*/
+/**********************************************************************
+Creates, or rather, initializes a mutex object to a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+
+
+#define mutex_create(M)	mutex_create_func((M), __FILE__, __LINE__)
+/*===================*/
+/**********************************************************************
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+
+void
+mutex_create_func(
+/*==============*/
+	mutex_t*	mutex,		/* in: pointer to memory */
+	char*		cfile_name,	/* in: file name where created */
+	ulint		cline);		/* in: file line where created */
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a mutex object from the mutex list. The mutex
+is checked to be in the reset state. */
+
+void
+mutex_free(
+/*=======*/
+	mutex_t*	mutex);	/* in: mutex */
+/******************************************************************
+NOTE! The following macro should be used in mutex locking, not the
+corresponding function. */
+
+#ifdef UNIV_SYNC_DEBUG
+#define mutex_enter(M)    mutex_enter_func((M), __FILE__, __LINE__)
+#else
+#define mutex_enter(M)    mutex_enter_func(M)
+#endif
+/******************************************************************
+NOTE! The following macro should be used in mutex locking, not the
+corresponding function. */
+
+/* NOTE! currently same as mutex_enter! */
+
+#ifdef UNIV_SYNC_DEBUG
+#define mutex_enter_fast(M)    mutex_enter_func((M), __FILE__, __LINE__)
+#else
+#define mutex_enter_fast(M)    mutex_enter_func(M)
+#endif
+
+#define mutex_enter_fast_func	mutex_enter_func;
+/**********************************************************************
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Locks a mutex for the current thread. If the mutex is reserved
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
+for the mutex before suspending the thread. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+	mutex_t*	mutex	/* in: pointer to mutex */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where locked */
+	ulint		line	/* in: line where locked */
+	#endif
+	);
+/************************************************************************
+Tries to lock the mutex for the current thread. If the lock is not acquired
+immediately, returns with return value 1. */
+
+ulint
+mutex_enter_nowait(
+/*===============*/
+				/* out: 0 if succeed, 1 if not */
+	mutex_t*	mutex);	/* in: pointer to mutex */
+/**********************************************************************
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit(
+/*=======*/
+	mutex_t*	mutex);	/* in: pointer to mutex */
+/**********************************************************************
+Returns TRUE if no mutex or rw-lock is currently locked.
+Works only in the debug version. */
+
+ibool
+sync_all_freed(void);
+/*================*/
+/*#####################################################################
+FUNCTION PROTOTYPES FOR DEBUGGING */
+/***********************************************************************
+Prints wait info of the sync system. */
+
+void
+sync_print_wait_info(void);
+/*======================*/
+/***********************************************************************
+Prints info of the sync system. */
+
+void
+sync_print(void);
+/*============*/
+/**********************************************************************
+Checks that the mutex has been initialized. */
+
+ibool
+mutex_validate(
+/*===========*/
+	mutex_t*	mutex);
+/**********************************************************************
+Sets the mutex latching level field. */
+
+void
+mutex_set_level(
+/*============*/
+	mutex_t*	mutex,	/* in: mutex */
+	ulint		level);	/* in: level */
+/**********************************************************************
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread. */
+
+void
+sync_thread_add_level(
+/*==================*/
+	void*	latch,	/* in: pointer to a mutex or an rw-lock */
+	ulint	level);	/* in: level in the latching order; if SYNC_LEVEL_NONE,
+			nothing is done */			
+/**********************************************************************
+Removes a latch from the thread level array if it is found there. */
+
+ibool
+sync_thread_reset_level(
+/*====================*/
+			/* out: TRUE if found from the array; it is no error
+			if the latch is not found, as we presently are not
+			able to determine the level for every latch
+			reservation the program does */
+	void*	latch);	/* in: pointer to a mutex or an rw-lock */
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty(void);
+/*==========================*/
+			/* out: TRUE if empty */
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty_gen(
+/*=========================*/
+					/* out: TRUE if empty except the
+					exceptions specified below */
+	ibool	dict_mutex_allowed);	/* in: TRUE if dictionary mutex is
+					allowed to be owned by the thread,
+					also purge_is_running mutex is
+					allowed */
+/**********************************************************************
+Checks that the current thread owns the mutex. Works only
+in the debug version. */
+
+ibool
+mutex_own(
+/*======*/
+				/* out: TRUE if owns */
+	mutex_t*	mutex);	/* in: mutex */
+/**********************************************************************
+Gets the debug information for a reserved mutex. */
+
+void
+mutex_get_debug_info(
+/*=================*/
+	mutex_t*	mutex,		/* in: mutex */
+	char**		file_name,	/* out: file where requested */
+	ulint*		line,		/* out: line where requested */
+	os_thread_id_t* thread_id);	/* out: id of the thread which owns
+					the mutex */
+/**********************************************************************
+Counts currently reserved mutexes. Works only in the debug version. */
+
+ulint
+mutex_n_reserved(void);
+/*==================*/
+/**********************************************************************
+Prints debug info of currently reserved mutexes. */
+
+void
+mutex_list_print_info(void);
+/*========================*/
+/**********************************************************************
+NOT to be used outside this module except in debugging! Gets the value
+of the lock word. */
+UNIV_INLINE
+ulint
+mutex_get_lock_word(
+/*================*/
+	mutex_t*	mutex);	/* in: mutex */
+/**********************************************************************
+NOT to be used outside this module except in debugging! Gets the waiters
+field in a mutex. */
+UNIV_INLINE
+ulint
+mutex_get_waiters(
+/*==============*/
+				/* out: value to set */		
+	mutex_t*	mutex);	/* in: mutex */
+/**********************************************************************
+Implements the memory barrier operation which makes a serialization point to
+the instruction flow. This is needed because the Pentium may speculatively
+execute reads before preceding writes are committed. We could also use here
+any LOCKed instruction (see Intel Software Dev. Manual, Vol. 3). */
+
+void
+mutex_fence(void);
+/*=============*/
+
+/*
+		LATCHING ORDER WITHIN THE DATABASE
+		==================================
+
+The mutex or latch in the central memory object, for instance, a rollback
+segment object, must be acquired before acquiring the latch or latches to
+the corresponding file data structure. In the latching order below, these
+file page object latches are placed immediately below the corresponding
+central memory object latch or mutex.
+
+Synchronization object			Notes
+----------------------			-----
+		
+Dictionary mutex			If we have a pointer to a dictionary
+|					object, e.g., a table, it can be
+|					accessed without reserving the
+|					dictionary mutex. We must have a
+|					reservation, a memoryfix, to the
+|					appropriate table object in this case,
+|					and the table must be explicitly
+|					released later.
+V
+Dictionary header
+|
+V					
+Secondary index tree latch		The tree latch protects also all
+|					the B-tree non-leaf pages. These
+V					can be read with the page only
+Secondary index non-leaf		bufferfixed to save CPU time,
+|					no s-latch is needed on the page.
+|					Modification of a page requires an
+|					x-latch on the page, however. If a
+|					thread owns an x-latch to the tree,
+|					it is allowed to latch non-leaf pages
+|					even after it has acquired the fsp
+|					latch.
+V					
+Secondary index leaf			The latch on the secondary index leaf
+|					can be kept while accessing the
+|					clustered index, to save CPU time.
+V
+Clustered index tree latch		To increase concurrency, the tree
+|					latch is usually released when the
+|					leaf page latch has been acquired.
+V					
+Clustered index non-leaf
+|
+V
+Clustered index leaf
+|
+V
+Transaction system header
+|
+V
+Transaction undo mutex			The undo log entry must be written
+|					before any index page is modified.
+|					Transaction undo mutex is for the undo
+|					logs the analogue of the tree latch
+|					for a B-tree. If a thread has the
+|					trx undo mutex reserved, it is allowed
+|					to latch the undo log pages in any
+|					order, and also after it has acquired
+|					the fsp latch. 
+V
+Rollback segment mutex			The rollback segment mutex must be
+|					reserved, if, e.g., a new page must
+|					be added to an undo log. The rollback
+|					segment and the undo logs in its
+|					history list can be seen as an
+|					analogue of a B-tree, and the latches
+|					reserved similarly, using a version of
+|					lock-coupling. If an undo log must be
+|					extended by a page when inserting an
+|					undo log record, this corresponds to
+|					a pessimistic insert in a B-tree.
+V
+Rollback segment header
+|
+V
+Purge system latch
+|
+V
+Undo log pages				If a thread owns the trx undo mutex,
+|					or for a log in the history list, the
+|					rseg mutex, it is allowed to latch
+|					undo log pages in any order, and even
+|					after it has acquired the fsp latch.
+|					If a thread does not have the
+|					appropriate mutex, it is allowed to
+|					latch only a single undo log page in
+|					a mini-transaction.
+V
+File space management latch		If a mini-transaction must allocate
+|					several file pages, it can do that,
+|					because it keeps the x-latch to the
+|					file space management in its memo.
+V
+File system pages
+|
+V
+Kernel mutex				If a kernel operation needs a file
+|					page allocation, it must reserve the
+|					fsp x-latch before acquiring the kernel
+|					mutex.
+V
+Search system mutex
+|
+V
+Buffer pool mutex
+|
+V
+Log mutex
+|
+Any other latch
+|
+V
+Memory pool mutex */
+
+/* Latching order levels */
+#define SYNC_NO_ORDER_CHECK	3000	/* this can be used to suppress
+					latching order checking */
+#define	SYNC_LEVEL_NONE		2000	/* default: level not defined */
+#define SYNC_DICT		1000
+#define	SYNC_PURGE_IS_RUNNING	997
+#define SYNC_DICT_HEADER	995
+#define SYNC_IBUF_HEADER	914
+#define SYNC_IBUF_PESS_INSERT_MUTEX 912
+#define SYNC_IBUF_MUTEX		910	/* ibuf mutex is really below
+					SYNC_FSP_PAGE: we assign value this
+					high only to get the program to pass
+					the debug checks */
+/*-------------------------------*/
+#define	SYNC_INDEX_TREE		900
+#define SYNC_TREE_NODE_NEW	892
+#define SYNC_TREE_NODE_FROM_HASH 891
+#define SYNC_TREE_NODE		890
+#define	SYNC_PURGE_SYS		810
+#define	SYNC_PURGE_LATCH	800
+#define	SYNC_TRX_UNDO		700
+#define SYNC_RSEG		600
+#define SYNC_RSEG_HEADER_NEW	591
+#define SYNC_RSEG_HEADER	590
+#define SYNC_TRX_UNDO_PAGE	570
+#define	SYNC_FSP		400
+#define	SYNC_FSP_PAGE		395
+/*------------------------------------- Insert buffer headers */ 
+/*------------------------------------- ibuf_mutex */
+/*------------------------------------- Insert buffer trees */
+#define	SYNC_IBUF_BITMAP_MUTEX	351
+#define	SYNC_IBUF_BITMAP	350
+/*-------------------------------*/
+#define	SYNC_KERNEL		300
+#define SYNC_REC_LOCK		299
+#define	SYNC_TRX_LOCK_HEAP	298
+#define SYNC_TRX_SYS_HEADER	290
+#define SYNC_LOG		170
+#define SYNC_RECV		168
+#define	SYNC_SEARCH_SYS		160	/* NOTE that if we have a memory
+					heap that can be extended to the
+					buffer pool, its logical level is
+					SYNC_SEARCH_SYS, as memory allocation
+					can call routines there! Otherwise
+					the level is SYNC_MEM_HASH. */
+#define	SYNC_BUF_POOL		150
+#define	SYNC_BUF_BLOCK		149
+#define	SYNC_ANY_LATCH		135
+#define	SYNC_MEM_HASH		131
+#define	SYNC_MEM_POOL		130
+
+/* Codes used to designate lock operations */
+#define RW_LOCK_NOT_LOCKED 	350
+#define RW_LOCK_EX		351
+#define RW_LOCK_EXCLUSIVE	351
+#define RW_LOCK_SHARED		352
+#define RW_LOCK_WAIT_EX		353
+#define SYNC_MUTEX		354
+
+#define MUTEX_CNAME_LEN	8
+
+/* NOTE! The structure appears here only for the compiler to know its size.
+Do not use its fields directly! The structure used in the spin lock
+implementation of a mutual exclusion semaphore. */
+
+struct mutex_struct {
+	ulint	lock_word;	/* This ulint is the target of the atomic
+				test-and-set instruction in Win32 */
+#ifndef _WIN32
+	os_fast_mutex_t
+		os_fast_mutex;	/* In other systems we use this OS mutex
+				in place of lock_word */
+#endif
+	ulint	waiters;	/* This ulint is set to 1 if there are (or
+				may be) threads waiting in the global wait
+				array for this mutex to be released.
+				Otherwise, this is 0. */
+	UT_LIST_NODE_T(mutex_t)	list; /* All allocated mutexes are put into
+				a list.	Pointers to the next and prev. */
+	os_thread_id_t thread_id; /* Debug version: The thread id of the
+				thread which locked the mutex. */
+	char*	file_name;	/* Debug version: File name where the mutex
+				was locked */
+	ulint	line;		/* Debug version: Line where the mutex was
+				locked */
+	ulint	level;		/* Debug version: level in the global latching
+				order; default SYNC_LEVEL_NONE */
+	char	cfile_name[MUTEX_CNAME_LEN];
+				/* File name where mutex created */
+	ulint	cline;		/* Line where created */
+	ulint	magic_n;
+};
+
+#define MUTEX_MAGIC_N	(ulint)979585
+
+/* The global array of wait cells for implementation of the databases own
+mutexes and read-write locks. Appears here for debugging purposes only! */
+
+extern sync_array_t*	sync_primary_wait_array;
+
+/* Constant determining how long spin wait is continued before suspending
+the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
+to 20 microseconds. */
+
+#define	SYNC_SPIN_ROUNDS	srv_n_spin_wait_rounds
+
+#define SYNC_INFINITE_TIME	((ulint)(-1))
+
+/* Means that a timeout elapsed when waiting */
+
+#define SYNC_TIME_EXCEEDED	(ulint)1
+
+/* The number of system calls made in this module. Intended for performance
+monitoring. */
+
+extern 	ulint	mutex_system_call_count;
+extern	ulint	mutex_exit_count;
+
+/* Latching order checks start when this is set TRUE */
+extern ibool	sync_order_checks_on;
+
+/* This variable is set to TRUE when sync_init is called */
+extern ibool	sync_initialized;
+
+#ifndef UNIV_NONINL
+#include "sync0sync.ic"
+#endif
+
+#endif
diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
new file mode 100644
index 00000000000..a937ac5d579
--- /dev/null
+++ b/innobase/include/sync0sync.ic
@@ -0,0 +1,226 @@
+/******************************************************
+Mutex, the basic synchronization primitive
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+/**********************************************************************
+Sets the waiters field in a mutex. */
+
+void
+mutex_set_waiters(
+/*==============*/
+	mutex_t*	mutex,	/* in: mutex */
+	ulint		n);	/* in: value to set */		
+/**********************************************************************
+Reserves a mutex for the current thread. If the mutex is reserved, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
+for the mutex before suspending the thread. */
+
+void
+mutex_spin_wait(
+/*============*/
+        mutex_t*   mutex  	/* in: pointer to mutex */
+
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where mutex requested */
+	ulint		line	/* in: line where requested */
+	#endif
+);
+/**********************************************************************
+Sets the debug information for a reserved mutex. */
+
+void
+mutex_set_debug_info(
+/*=================*/
+	mutex_t*	mutex,		/* in: mutex */
+	char*		file_name,	/* in: file where requested */
+	ulint		line);		/* in: line where requested */
+/**********************************************************************
+Releases the threads waiting in the primary wait array for this mutex. */
+
+void
+mutex_signal_object(
+/*================*/
+	mutex_t*	mutex);	/* in: mutex */
+
+/**********************************************************************
+Performs an atomic test-and-set instruction to the lock_word field of a
+mutex. */
+UNIV_INLINE
+ulint
+mutex_test_and_set(
+/*===============*/
+				/* out: the previous value of lock_word: 0 or
+				1 */
+	mutex_t*	mutex)	/* in: mutex */
+{
+#ifdef _WIN32
+	ulint	res;
+	ulint*	lw;		/* assembler code is used to ensure that
+				lock_word is loaded from memory */
+	ut_ad(mutex);
+	ut_ad(sizeof(ulint) == 4);
+
+	lw = &(mutex->lock_word);
+
+        __asm   MOV     ECX, lw
+	__asm   MOV     EDX, 1
+        __asm   XCHG    EDX, DWORD PTR [ECX]                    
+       	__asm   MOV     res, EDX
+
+	/* The fence below would prevent this thread from reading the data
+	structure protected by the mutex before the test-and-set operation is
+	committed, but the fence is apparently not needed:
+
+	In a posting to comp.arch newsgroup (August 10, 1997) Andy Glew said
+	that in P6 a LOCKed instruction like XCHG establishes a fence with
+	respect to memory reads and writes and thus an explicit fence is not
+	needed. In P5 he seemed to agree with a previous newsgroup poster that
+	LOCKed instructions serialize all instruction execution, and,
+	consequently, also memory operations. This is confirmed in Intel
+	Software Dev. Manual, Vol. 3. */
+
+	/* mutex_fence(); */
+
+	return(res);
+#else
+	ibool	ret;
+
+	ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex));
+
+	if (ret == 0) {
+		mutex->lock_word = 1;
+	}
+
+	return(ret);
+#endif
+}
+
+/**********************************************************************
+Performs a reset instruction to the lock_word field of a mutex. This
+instruction also serializes memory operations to the program order. */
+UNIV_INLINE
+void
+mutex_reset_lock_word(
+/*==================*/
+	mutex_t*	mutex)	/* in: mutex */
+{
+#ifdef _WIN32
+	ulint*	lw;		/* assembler code is used to ensure that
+				lock_word is loaded from memory */
+	ut_ad(mutex);
+
+	lw = &(mutex->lock_word);
+
+	__asm   MOV     EDX, 0
+        __asm   MOV     ECX, lw
+        __asm   XCHG    EDX, DWORD PTR [ECX]                    
+#else
+	mutex->lock_word = 0;
+
+	os_fast_mutex_unlock(&(mutex->os_fast_mutex));
+#endif
+}
+
+/**********************************************************************
+Gets the value of the lock word. */
+UNIV_INLINE
+ulint
+mutex_get_lock_word(
+/*================*/
+	mutex_t*	mutex)	/* in: mutex */
+{
+volatile ulint*	ptr;		/* declared volatile to ensure that
+				lock_word is loaded from memory */
+	ut_ad(mutex);
+
+	ptr = &(mutex->lock_word);
+
+	return(*ptr);
+}
+
+/**********************************************************************
+Gets the waiters field in a mutex. */
+UNIV_INLINE
+ulint
+mutex_get_waiters(
+/*==============*/
+				/* out: value to set */		
+	mutex_t*	mutex)	/* in: mutex */
+{
+volatile ulint*	ptr;		/* declared volatile to ensure that
+				the value is read from memory */
+	ut_ad(mutex);
+
+	ptr = &(mutex->waiters);
+
+	return(*ptr);		/* Here we assume that the read of a single
+				word from memory is atomic */
+}
+
+/**********************************************************************
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+mutex_exit(
+/*=======*/
+	mutex_t*	mutex)	/* in: pointer to mutex */
+{
+	ut_ad(mutex_own(mutex));
+
+#ifdef UNIV_SYNC_DEBUG
+	mutex->thread_id = ULINT_UNDEFINED;
+
+	sync_thread_reset_level(mutex);
+#endif 
+	mutex_reset_lock_word(mutex);
+
+	if (mutex_get_waiters(mutex) != 0) {
+		
+		mutex_signal_object(mutex);
+	}
+	
+#ifdef UNIV_SYNC_PERF_STAT
+	mutex_exit_count++;
+#endif
+}
+
+/**********************************************************************
+Locks a mutex for the current thread. If the mutex is reserved, the function
+spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
+before suspending the thread. */
+UNIV_INLINE
+void
+mutex_enter_func(
+/*=============*/
+	mutex_t*	mutex	/* in: pointer to mutex */
+	#ifdef UNIV_SYNC_DEBUG
+	,char*		file_name, /* in: file name where locked */
+	ulint		line	/* in: line where locked */
+	#endif
+	)
+{
+	ut_ad(mutex_validate(mutex));
+
+	/* Note that we do not peek at the value of lock_word before trying
+	the atomic test_and_set; we could peek, and possibly save time. */
+	
+	if (!mutex_test_and_set(mutex)) {
+
+		#ifdef UNIV_SYNC_DEBUG
+		mutex_set_debug_info(mutex, file_name, line);
+		#endif
+
+		return;	/* Succeeded! */
+	}
+
+	mutex_spin_wait(mutex
+			     #ifdef UNIV_SYNC_DEBUG
+			     ,file_name,
+			     line
+			     #endif
+			     );
+}
diff --git a/innobase/include/sync0types.h b/innobase/include/sync0types.h
new file mode 100644
index 00000000000..2c31f80cca3
--- /dev/null
+++ b/innobase/include/sync0types.h
@@ -0,0 +1,15 @@
+/******************************************************
+Global types for sync
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef sync0types_h
+#define sync0types_h
+
+typedef struct mutex_struct		mutex_t;
+
+
+#endif
diff --git a/innobase/include/thr0loc.h b/innobase/include/thr0loc.h
new file mode 100644
index 00000000000..32e2dc3ae93
--- /dev/null
+++ b/innobase/include/thr0loc.h
@@ -0,0 +1,67 @@
+/******************************************************
+The thread local storage
+
+(c) 1995 Innobase Oy
+
+Created 10/5/1995 Heikki Tuuri
+*******************************************************/
+
+/* This module implements storage private to each thread,
+a capability useful in some situations like storing the
+OS handle to the current thread, or its priority. */
+
+#ifndef thr0loc_h
+#define thr0loc_h
+
+#include "univ.i"
+#include "os0thread.h"
+
+/********************************************************************
+Initializes the thread local storage module. */
+
+void
+thr_local_init(void);
+/*================*/
+/***********************************************************************
+Creates a local storage struct for the calling new thread. */
+
+void
+thr_local_create(void);
+/*==================*/
+/***********************************************************************
+Frees the local storage struct for the specified thread. */
+
+void
+thr_local_free(
+/*===========*/
+	os_thread_id_t	id);	/* in: thread id */
+/***********************************************************************
+Gets the slot number in the thread table of a thread. */
+
+ulint
+thr_local_get_slot_no(
+/*==================*/
+				/* out: slot number */
+	os_thread_id_t	id);	/* in: thread id of the thread */
+/***********************************************************************
+Sets in the local storage the slot number in the thread table of a thread. */
+
+void
+thr_local_set_slot_no(
+/*==================*/
+	os_thread_id_t	id,	/* in: thread id of the thread */
+	ulint		slot_no);/* in: slot number */
+/***********************************************************************
+Returns pointer to the 'in_ibuf' field within the current thread local
+storage. */
+
+ibool*
+thr_local_get_in_ibuf_field(void);
+/*=============================*/
+			/* out: pointer to the in_ibuf field */
+
+#ifndef UNIV_NONINL
+#include "thr0loc.ic"
+#endif
+
+#endif
diff --git a/innobase/include/thr0loc.ic b/innobase/include/thr0loc.ic
new file mode 100644
index 00000000000..b8b8136180c
--- /dev/null
+++ b/innobase/include/thr0loc.ic
@@ -0,0 +1,7 @@
+/******************************************************
+Thread local storage
+
+(c) 1995 Innobase Oy
+
+Created 10/4/1995 Heikki Tuuri
+*******************************************************/
diff --git a/innobase/include/trx0purge.h b/innobase/include/trx0purge.h
new file mode 100644
index 00000000000..8870ebc936c
--- /dev/null
+++ b/innobase/include/trx0purge.h
@@ -0,0 +1,166 @@
+/******************************************************
+Purge old versions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0purge_h
+#define trx0purge_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "que0types.h"
+#include "page0page.h"
+#include "usr0sess.h"
+#include "fil0fil.h"
+
+/* The global data structure coordinating a purge */
+extern trx_purge_t*	purge_sys;
+
+/* A dummy undo record used as a return value when we have a whole undo log
+which needs no purge */
+extern trx_undo_rec_t	trx_purge_dummy_rec;
+
+/************************************************************************
+Calculates the file address of an undo log header when we have the file
+address of its history list node. */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+					/* out: file address of the log */
+	fil_addr_t	node_addr);	/* in: file address of the history
+					list node of the log */
+/*********************************************************************
+Checks if trx_id is >= purge_view: then it is guaranteed that its update
+undo log still exists in the system. */
+
+ibool
+trx_purge_update_undo_must_exist(
+/*=============================*/
+			/* out: TRUE if is sure that it is preserved, also
+			if the function returns FALSE, it is possible that
+			the undo log still exists in the system */
+	dulint	trx_id);/* in: transaction id */
+/************************************************************************
+Creates the global purge system control structure and inits the history
+mutex. */
+
+void
+trx_purge_sys_create(void);
+/*======================*/
+/************************************************************************
+Adds the update undo log as the first log in the history list. Removes the
+update undo log segment from the rseg slot if it is too big for reuse. */
+
+void
+trx_purge_add_update_undo_to_history(
+/*=================================*/
+	trx_t*	trx,		/* in: transaction */
+	page_t*	undo_page,	/* in: update undo log header page,
+				x-latched */
+	mtr_t*	mtr);		/* in: mtr */
+/************************************************************************
+Fetches the next undo log record from the history list to purge. It must be
+released with the corresponding release function. */
+
+trx_undo_rec_t*
+trx_purge_fetch_next_rec(
+/*=====================*/
+				/* out: copy of an undo log record, or
+				pointer to the dummy undo log record
+				&trx_purge_dummy_rec if the whole undo log
+				can skipped in purge; NULL if none left */
+	dulint*		roll_ptr,/* out: roll pointer to undo record */
+	trx_undo_inf_t** cell,	/* out: storage cell for the record in the
+				purge array */
+	mem_heap_t*	heap);	/* in: memory heap where copied */
+/***********************************************************************
+Releases a reserved purge undo record. */
+
+void
+trx_purge_rec_release(
+/*==================*/
+	trx_undo_inf_t*	cell);	/* in: storage cell */
+/***********************************************************************
+This function runs a purge batch. */
+
+ulint
+trx_purge(void);
+/*===========*/
+				/* out: number of undo log pages handled in
+				the batch */
+
+/* The control structure used in the purge operation */
+struct trx_purge_struct{
+	ulint		state;		/* Purge system state */
+	sess_t*		sess;		/* System session running the purge
+					query */
+	trx_t*		trx;		/* System transaction running the purge
+					query: this trx is not in the trx list
+					of the trx system and it never ends */
+	que_t*		query;		/* The query graph which will do the
+					parallelized purge operation */
+	rw_lock_t	purge_is_running;/* Purge operation set an x-latch here
+					while it is accessing a table: this
+					prevents dropping of the table */
+	rw_lock_t	latch;		/* The latch protecting the purge view.
+					A purge operation must acquire an
+					x-latch here for the instant at which
+					it changes the purge view: an undo
+					log operation can prevent this by
+					obtaining an s-latch here. */
+	read_view_t*	view;		/* The purge will not remove undo logs
+					which are >= this view (purge view) */
+	mutex_t		mutex;		/* Mutex protecting the fields below */
+	ulint		n_pages_handled;/* Approximate number of undo log
+					pages processed in purge */
+	ulint		handle_limit;	/* Target of how many pages to get
+					processed in the current purge */
+	/*------------------------------*/
+	/* The following two fields form the 'purge pointer' which advances
+	during a purge, and which is used in history list truncation */
+
+	dulint		purge_trx_no;	/* Purge has advanced past all
+					transactions whose number is less
+					than this */
+	dulint		purge_undo_no;	/* Purge has advanced past all records
+					whose undo number is less than this */
+	/*-----------------------------*/
+	ibool		next_stored;	/* TRUE if the info of the next record
+					to purge is stored below: if yes, then
+					the transaction number and the undo
+					number of the record are stored in
+					purge_trx_no and purge_undo_no above */
+	trx_rseg_t*	rseg;		/* Rollback segment for the next undo
+					record to purge */
+	ulint		page_no;	/* Page number for the next undo
+					record to purge, page number of the
+					log header, if dummy record */
+	ulint		offset;		/* Page offset for the next undo
+					record to purge, 0 if the dummy
+					record */
+	ulint		hdr_page_no;	/* Header page of the undo log where
+					the next record to purge belongs */
+	ulint		hdr_offset;	/* Header byte offset on the page */
+	/*-----------------------------*/
+	trx_undo_arr_t*	arr;		/* Array of transaction numbers and
+					undo numbers of the undo records
+					currently under processing in purge */
+	mem_heap_t*	heap;		/* Temporary storage used during a
+					purge: can be emptied after purge
+					completes */
+};
+
+#define TRX_PURGE_ON		1	/* purge operation is running */
+#define TRX_STOP_PURGE		2	/* purge operation is stopped, or
+					it should be stopped */
+#ifndef UNIV_NONINL
+#include "trx0purge.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0purge.ic b/innobase/include/trx0purge.ic
new file mode 100644
index 00000000000..451e8ca31d0
--- /dev/null
+++ b/innobase/include/trx0purge.ic
@@ -0,0 +1,26 @@
+/******************************************************
+Purge old versions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0undo.h"
+
+/************************************************************************
+Calculates the file address of an undo log header when we have the file
+address of its history list node. */
+UNIV_INLINE
+fil_addr_t
+trx_purge_get_log_from_hist(
+/*========================*/
+					/* out: file address of the log */
+	fil_addr_t	node_addr)	/* in: file address of the history
+					list node of the log */
+{
+	node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
+
+	return(node_addr);
+}	
+
diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h
new file mode 100644
index 00000000000..ea9e9f3fce5
--- /dev/null
+++ b/innobase/include/trx0rec.h
@@ -0,0 +1,284 @@
+/******************************************************
+Transaction undo log record
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rec_h
+#define trx0rec_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "data0data.h"
+#include "rem0types.h"
+
+/***************************************************************************
+Copies the undo record to the heap. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+					/* out, own: copy of undo log record */
+	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
+	mem_heap_t*	heap);		/* in: heap where copied */
+/**************************************************************************
+Reads the undo log record type. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+					/* out: record type */
+	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
+/**************************************************************************
+Reads from an undo log record the record compiler info. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+					/* out: compiler info */
+	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
+/**************************************************************************
+Reads the undo log record number. */
+UNIV_INLINE
+dulint
+trx_undo_rec_get_undo_no(
+/*=====================*/
+					/* out: undo no */
+	trx_undo_rec_t*	undo_rec);	/* in: undo log record */
+/**************************************************************************
+Reads from an undo log record the general parameters. */
+
+byte*
+trx_undo_rec_get_pars(
+/*==================*/
+					/* out: remaining part of undo log
+					record after reading these values */
+	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
+	ulint*		type,		/* out: undo record type:
+					TRX_UNDO_INSERT_REC, ... */
+	ulint*		cmpl_info,	/* out: compiler info, relevant only
+					for update type records */
+	dulint*		undo_no,	/* out: undo log record number */
+	dulint*		table_id);	/* out: table id */
+/***********************************************************************
+Builds a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_get_row_ref(
+/*=====================*/
+				/* out: pointer to remaining part of undo
+				record */
+	byte*		ptr,	/* in: remaining part of a copy of an undo log
+				record, at the start of the row reference;
+				NOTE that this copy of the undo log record must
+				be preserved as long as the row reference is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/* in: clustered index */
+	dtuple_t**	ref,	/* out, own: row reference */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory
+				needed is allocated */
+/***********************************************************************
+Skips a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_skip_row_ref(
+/*======================*/
+				/* out: pointer to remaining part of undo
+				record */
+	byte*		ptr,	/* in: remaining part in update undo log
+				record, at the start of the row reference */
+	dict_index_t*	index);	/* in: clustered index */
+/**************************************************************************
+Reads from an undo log update record the system field values of the old
+version. */
+
+byte*
+trx_undo_update_rec_get_sys_cols(
+/*=============================*/
+				/* out: remaining part of undo log
+				record after reading these values */
+	byte*	ptr,		/* in: remaining part of undo log
+				record after reading general
+				parameters */
+	dulint*	trx_id,		/* out: trx id */
+	dulint*	roll_ptr,	/* out: roll ptr */
+	ulint*	info_bits);	/* out: info bits state */
+/***********************************************************************
+Builds an update vector based on a remaining part of an undo log record. */
+
+byte*
+trx_undo_update_rec_get_update(
+/*===========================*/
+				/* out: remaining part of the record */
+	byte*		ptr,	/* in: remaining part in update undo log
+				record, after reading the row reference
+				NOTE that this copy of the undo log record must
+				be preserved as long as the update vector is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/* in: clustered index */
+	ulint		type,	/* in: TRX_UNDO_UPD_EXIST_REC,
+				TRX_UNDO_UPD_DEL_REC, or
+				TRX_UNDO_DEL_MARK_REC; in the last case,
+				only trx id and roll ptr fields are added to
+				the update vector */
+	dulint		trx_id,	/* in: transaction id from this undorecord */
+	dulint		roll_ptr,/* in: roll pointer from this undo record */
+	ulint		info_bits,/* in: info bits from this undo record */
+	mem_heap_t*	heap,	/* in: memory heap from which the memory
+				needed is allocated */
+	upd_t**		upd);	/* out, own: update vector */
+/***********************************************************************
+Builds a partial row from an update undo log record. It contains the
+columns which occur as ordering in any index of the table. */
+
+byte*
+trx_undo_rec_get_partial_row(
+/*=========================*/
+				/* out: pointer to remaining part of undo
+				record */
+	byte*		ptr,	/* in: remaining part in update undo log
+				record of a suitable type, at the start of
+				the stored index columns;
+				NOTE that this copy of the undo log record must
+				be preserved as long as the partial row is
+				used, as we do NOT copy the data in the
+				record! */
+	dict_index_t*	index,	/* in: clustered index */
+	dtuple_t**	row,	/* out, own: partial row */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory
+				needed is allocated */
+/***************************************************************************
+Writes information to an undo log about an insert, update, or a delete marking
+of a clustered index record. This information is used in a rollback of the
+transaction and in consistent reads that must look to the history of this
+transaction. */
+
+ulint
+trx_undo_report_row_operation(
+/*==========================*/
+					/* out: DB_SUCCESS or error code */
+	ulint		flags,		/* in: if BTR_NO_UNDO_LOG_FLAG bit is
+					set, does nothing */
+	ulint		op_type,	/* in: TRX_UNDO_INSERT_OP or
+					TRX_UNDO_MODIFY_OP */
+	que_thr_t*	thr,		/* in: query thread */
+	dict_index_t*	index,		/* in: clustered index */
+	dtuple_t*	clust_entry,	/* in: in the case of an insert,
+					index entry to insert into the
+					clustered index, otherwise NULL */
+	upd_t*		update,		/* in: in the case of an update,
+					the update vector, otherwise NULL */
+	ulint		cmpl_info,	/* in: compiler info on secondary
+					index updates */
+	rec_t*		rec,		/* in: case of an update or delete
+					marking, the record in the clustered
+					index, otherwise NULL */
+	dulint*		roll_ptr);	/* out: rollback pointer to the
+					inserted undo log record,
+					ut_dulint_zero if BTR_NO_UNDO_LOG
+					flag was specified */
+/**********************************************************************
+Copies an undo record to heap. This function can be called if we know that
+the undo log record exists. */
+
+trx_undo_rec_t*
+trx_undo_get_undo_rec_low(
+/*======================*/
+					/* out, own: copy of the record */
+	dulint		roll_ptr,	/* in: roll pointer to record */
+	mem_heap_t*	heap);		/* in: memory heap where copied */
+/**********************************************************************
+Copies an undo record to heap. */
+
+ulint
+trx_undo_get_undo_rec(
+/*==================*/
+					/* out: DB_SUCCESS, or
+					DB_MISSING_HISTORY if the undo log
+					has been truncated and we cannot
+					fetch the old version; NOTE: the
+					caller must have latches on the
+					clustered index page and purge_view */
+	dulint		roll_ptr,	/* in: roll pointer to record */
+	dulint		trx_id,		/* in: id of the trx that generated
+					the roll pointer: it points to an
+					undo log of this transaction */
+	trx_undo_rec_t** undo_rec,	/* out, own: copy of the record */
+	mem_heap_t*	heap);		/* in: memory heap where copied */
+/***********************************************************************
+Build a previous version of a clustered index record. This function checks
+that the caller has a latch on the index page of the clustered index record
+and an s-latch on the purge_view. This guarantees that the stack of versions
+is locked. */
+
+ulint
+trx_undo_prev_version_build(
+/*========================*/
+				/* out: DB_SUCCESS, or DB_MISSING_HISTORY if
+				the previous version is not >= purge_view,
+				which means that it may have been removed */
+	rec_t*		index_rec,/* in: clustered index record in the
+				index tree */
+	mtr_t*		index_mtr,/* in: mtr which contains the latch to
+				index_rec page and purge_view */
+	rec_t*		rec,	/* in: version of a clustered index record */
+	dict_index_t*	index,	/* in: clustered index */
+	mem_heap_t*	heap,	/* in: memory heap from which the memory
+				needed is allocated */
+	rec_t**		old_vers);/* out, own: previous version, or NULL if
+				rec is the first inserted version, or if
+				history data has been deleted */
+/***************************************************************
+Parses a redo log record of adding an undo log record. */
+
+byte*
+trx_undo_parse_add_undo_rec(
+/*========================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */
+/***************************************************************
+Parses a redo log record of erasing of an undo page end. */
+
+byte*
+trx_undo_parse_erase_page_end(
+/*==========================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+
+/* Types of an undo log record: these have to be smaller than 16, as the
+compilation info multiplied by 16 is ORed to this value in an undo log
+record */
+#define TRX_UNDO_INSERT_REC	11	/* fresh insert into clustered index */
+#define TRX_UNDO_UPD_EXIST_REC	12	/* update of a non-delete-marked
+					record */
+#define	TRX_UNDO_UPD_DEL_REC	13	/* update of a delete marked record to
+					a not delete marked record; also the
+					fields of the record can change */
+#define TRX_UNDO_DEL_MARK_REC	14	/* delete marking of a record; fields
+					do not change */
+#define	TRX_UNDO_CMPL_INFO_MULT	16	/* compilation info is multiplied by
+					this and ORed to the type above */
+					
+/* Operation type flags used in trx_undo_report_row_operation */
+#define TRX_UNDO_INSERT_OP	1
+#define TRX_UNDO_MODIFY_OP	2
+
+#ifndef UNIV_NONINL
+#include "trx0rec.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0rec.ic b/innobase/include/trx0rec.ic
new file mode 100644
index 00000000000..f813a52ff9c
--- /dev/null
+++ b/innobase/include/trx0rec.ic
@@ -0,0 +1,69 @@
+/******************************************************
+Transaction undo log record
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/**************************************************************************
+Reads from an undo log record the record type. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_type(
+/*==================*/
+					/* out: record type */
+	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
+{
+	return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
+}
+
+/**************************************************************************
+Reads from an undo log record the record compiler info. */
+UNIV_INLINE
+ulint
+trx_undo_rec_get_cmpl_info(
+/*=======================*/
+					/* out: compiler info */
+	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
+{
+	return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
+}
+
+/**************************************************************************
+Reads the undo log record number. */
+UNIV_INLINE
+dulint
+trx_undo_rec_get_undo_no(
+/*=====================*/
+					/* out: undo no */
+	trx_undo_rec_t*	undo_rec)	/* in: undo log record */
+{
+	byte*	ptr;
+
+	ptr = undo_rec + 3;
+
+	return(mach_dulint_read_much_compressed(ptr));
+}
+
+/***************************************************************************
+Copies the undo record to the heap. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_rec_copy(
+/*==============*/
+					/* out, own: copy of undo log record */
+	trx_undo_rec_t*	undo_rec,	/* in: undo log record */
+	mem_heap_t*	heap)		/* in: heap where copied */
+{
+	ulint		len;
+	trx_undo_rec_t*	rec_copy;
+
+	len = mach_read_from_2(undo_rec) + buf_frame_align(undo_rec)
+								- undo_rec;
+	rec_copy = mem_heap_alloc(heap, len);
+
+	ut_memcpy(rec_copy, undo_rec, len);
+
+	return(rec_copy);
+}
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
new file mode 100644
index 00000000000..c456768e820
--- /dev/null
+++ b/innobase/include/trx0roll.h
@@ -0,0 +1,216 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0roll_h
+#define trx0roll_h
+
+#include "univ.i"
+#include "trx0trx.h"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+
+/***********************************************************************
+Returns a transaction savepoint taken at this point in time. */
+
+trx_savept_t
+trx_savept_take(
+/*============*/
+			/* out: savepoint */
+	trx_t*	trx);	/* in: transaction */
+/***********************************************************************
+Creates an undo number array. */
+
+trx_undo_arr_t*
+trx_undo_arr_create(void);
+/*=====================*/
+/***********************************************************************
+Frees an undo number array. */
+
+void
+trx_undo_arr_free(
+/*==============*/
+	trx_undo_arr_t*	arr);	/* in: undo number array */
+/***********************************************************************
+Returns pointer to nth element in an undo number array. */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+				/* out: pointer to the nth element */
+	trx_undo_arr_t*	arr,	/* in: undo number array */
+	ulint		n);	/* in: position */
+/***************************************************************************
+Tries truncate the undo logs. */
+
+void
+trx_roll_try_truncate(
+/*==================*/
+	trx_t*	trx);	/* in: transaction */
+/************************************************************************
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release. */
+
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+				/* out: undo log record copied to heap, NULL
+				if none left, or if the undo number of the
+				top record would be less than the limit */
+	trx_t*		trx,	/* in: transaction */
+	dulint		limit,	/* in: least undo number we need */
+	dulint*		roll_ptr,/* out: roll pointer to undo record */
+	mem_heap_t*	heap);	/* in: memory heap where copied */
+/************************************************************************
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above. */
+
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+			/* out: TRUE if succeeded */
+	trx_t*	trx,	/* in: transaction */
+	dulint	undo_no);/* in: undo number of the record */
+/***********************************************************************
+Releases a reserved undo record. */
+
+void
+trx_undo_rec_release(
+/*=================*/
+	trx_t*	trx,	/* in: transaction */
+	dulint	undo_no);/* in: undo number */
+/*************************************************************************
+Starts a rollback operation. */	
+
+void
+trx_rollback(
+/*=========*/
+	trx_t*		trx,	/* in: transaction */
+	trx_sig_t*	sig,	/* in: signal starting the rollback */
+	que_thr_t**	next_thr);/* in/out: next query thread to run;
+				if the value which is passed in is
+				a pointer to a NULL pointer, then the
+				calling function can start running
+				a new query thread */
+/***********************************************************************
+Rollback uncommitted transactions which have no user session. */
+
+void
+trx_rollback_all_without_sess(void);
+/*===============================*/
+/********************************************************************
+Finishes a transaction rollback. */
+
+void
+trx_finish_rollback_off_kernel(
+/*===========================*/
+	que_t*		graph,	/* in: undo graph which can now be freed */
+	trx_t*		trx,	/* in: transaction */
+	que_thr_t**	next_thr);/* in/out: next query thread to run;
+				if the value which is passed in is
+				a pointer to a NULL pointer, then the
+   				calling function can start running
+				a new query thread; if this parameter is
+				NULL, it is ignored */
+/********************************************************************
+Builds an undo 'query' graph for a transaction. The actual rollback is
+performed by executing this query graph like a query subprocedure call.
+The reply about the completion of the rollback will be sent by this
+graph. */
+
+que_t*
+trx_roll_graph_build(
+/*=================*/
+			/* out, own: the query graph */
+	trx_t*	trx);	/* in: trx handle */
+/*************************************************************************
+Creates a rollback command node struct. */
+
+roll_node_t*
+roll_node_create(
+/*=============*/
+				/* out, own: rollback node struct */
+	mem_heap_t*	heap);	/* in: mem heap where created */
+/***************************************************************
+Performs an execution step for a rollback command node in a query graph. */
+
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+				/* out: query thread to run next, or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_rollback_for_mysql(
+/*===================*/
+			/* out: error code or DB_SUCCESS */
+	trx_t*	trx);	/* in: transaction handle */
+/***********************************************************************
+Rollback the latest SQL statement for MySQL. */
+
+int
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+			/* out: error code or DB_SUCCESS */
+	trx_t*	trx);	/* in: transaction handle */
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_general_rollback_for_mysql(
+/*===========================*/
+				/* out: error code or DB_SUCCESS */
+	trx_t*		trx,	/* in: transaction handle */
+	ibool		partial,/* in: TRUE if partial rollback requested */
+	trx_savept_t*	savept);/* in: pointer to savepoint undo number, if
+				partial rollback requested */
+
+extern sess_t*		trx_dummy_sess;
+
+/* A cell in the array used during a rollback and a purge */
+struct	trx_undo_inf_struct{
+	dulint	trx_no;		/* transaction number: not defined during
+				a rollback */
+	dulint	undo_no;	/* undo number of an undo record */
+	ibool	in_use;		/* TRUE if the cell is in use */
+};
+
+/* During a rollback and a purge, undo numbers of undo records currently being
+processed are stored in this array */
+
+struct trx_undo_arr_struct{
+	ulint		n_cells;	/* number of cells in the array */
+	ulint		n_used;		/* number of cells currently in use */
+	trx_undo_inf_t*	infos;		/* the array of undo infos */
+	mem_heap_t*	heap;		/* memory heap from which allocated */
+};
+
+/* Rollback command node in a query graph */
+struct roll_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_ROLLBACK */
+	ulint		state;	/* node execution state */
+	ibool		partial;/* TRUE if we want a partial rollback */
+	trx_savept_t	savept;	/* savepoint to which to roll back, in the
+				case of a partial rollback */
+};
+
+/* Rollback node states */
+#define ROLL_NODE_SEND	1
+#define ROLL_NODE_WAIT	2
+
+#ifndef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0roll.ic b/innobase/include/trx0roll.ic
new file mode 100644
index 00000000000..dfde83ac478
--- /dev/null
+++ b/innobase/include/trx0roll.ic
@@ -0,0 +1,23 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/***********************************************************************
+Returns pointer to nth element in an undo number array. */
+UNIV_INLINE
+trx_undo_inf_t*
+trx_undo_arr_get_nth_info(
+/*======================*/
+				/* out: pointer to the nth element */
+	trx_undo_arr_t*	arr,	/* in: undo number array */
+	ulint		n)	/* in: position */
+{
+	ut_ad(arr);
+	ut_ad(n < arr->n_cells);
+
+	return(arr->infos + n);
+}
diff --git a/innobase/include/trx0rseg.h b/innobase/include/trx0rseg.h
new file mode 100644
index 00000000000..fd64612ab3f
--- /dev/null
+++ b/innobase/include/trx0rseg.h
@@ -0,0 +1,193 @@
+/******************************************************
+Rollback segment
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0rseg_h
+#define trx0rseg_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "trx0sys.h"
+
+/**********************************************************************
+Gets a rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+				/* out: rollback segment header, page
+				x-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number of the header */
+	mtr_t*	mtr);		/* in: mtr */
+/**********************************************************************
+Gets a newly created rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+				/* out: rollback segment header, page
+				x-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number of the header */
+	mtr_t*	mtr);		/* in: mtr */
+/*******************************************************************
+Gets the file page number of the nth undo log slot. */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+				/* out: page number of the undo log segment */
+	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
+	ulint		n,	/* in: index of slot */
+	mtr_t*		mtr);	/* in: mtr */
+/*******************************************************************
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
+	ulint		n,	/* in: index of slot */
+	ulint		page_no,/* in: page number of the undo log segment */
+	mtr_t*		mtr);	/* in: mtr */
+/********************************************************************
+Looks for a free slot for an undo log segment. */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+				/* out: slot index or ULINT_UNDEFINED if not
+				found */
+	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
+	mtr_t*		mtr);	/* in: mtr */
+/**********************************************************************
+Looks for a rollback segment, based on the rollback segment id. */
+
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+			/* out: rollback segment */
+	ulint	id);	/* in: rollback segment id */
+/********************************************************************
+Creates a rollback segment header. This function is called only when
+a new rollback segment is created in the database. */
+
+ulint
+trx_rseg_header_create(
+/*===================*/
+				/* out: page number of the created segment,
+				FIL_NULL if fail */
+	ulint	space,		/* in: space id */
+	ulint	max_size,	/* in: max size in pages */
+	ulint*	slot_no,	/* out: rseg id == slot number in trx sys */
+	mtr_t*	mtr);		/* in: mtr */
+/*************************************************************************
+Creates the memory copies for rollback segments and initializes the
+rseg list and array in trx_sys at a database startup. */
+
+void
+trx_rseg_list_and_array_init(
+/*=========================*/
+	trx_sysf_t*	sys_header,	/* in: trx system header */
+	mtr_t*		mtr);		/* in: mtr */
+/********************************************************************
+Creates a new rollback segment to the database. */
+
+trx_rseg_t*
+trx_rseg_create(
+/*============*/
+				/* out: the created segment object, NULL if
+				fail */
+	ulint	space,		/* in: space id */
+	ulint	max_size,	/* in: max size in pages */
+	ulint*	id,		/* out: rseg id */
+	mtr_t*	mtr);		/* in: mtr */
+
+
+/* Number of undo log slots in a rollback segment file copy */
+#define TRX_RSEG_N_SLOTS	1024
+
+/* Maximum number of transactions supported by a single rollback segment */
+#define TRX_RSEG_MAX_N_TRXS	(TRX_RSEG_N_SLOTS / 2)
+
+/* The rollback segment memory object */
+struct trx_rseg_struct{
+	/*--------------------------------------------------------*/
+	ulint		id;	/* rollback segment id == the index of 
+				its slot in the trx system file copy */
+	mutex_t		mutex;	/* mutex protecting the fields in this
+				struct except id; NOTE that the latching
+				order must always be kernel mutex ->
+				rseg mutex */
+	ulint		space;	/* space where the rollback segment is 
+				header is placed */
+	ulint		page_no;/* page number of the rollback segment
+				header */
+	ulint		max_size;/* maximum allowed size in pages */
+	ulint		curr_size;/* current size in pages */
+	/*--------------------------------------------------------*/
+	/* Fields for update undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
+					/* List of update undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
+					/* List of update undo log segments
+					cached for fast reuse */
+	/*--------------------------------------------------------*/
+	/* Fields for insert undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
+					/* List of insert undo logs */
+	UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+					/* List of insert undo log segments
+					cached for fast reuse */
+	/*--------------------------------------------------------*/
+	ulint		last_page_no;	/* Page number of the last not yet
+					purged log header in the history list;
+					FIL_NULL if all list purged */
+	ulint		last_offset;	/* Byte offset of the last not yet
+					purged log header */
+	dulint		last_trx_no;	/* Transaction number of the last not
+					yet purged log */
+	ibool		last_del_marks;	/* TRUE if the last not yet purged log
+					needs purging */
+	/*--------------------------------------------------------*/
+	UT_LIST_NODE_T(trx_rseg_t) rseg_list;
+					/* the list of the rollback segment
+					memory objects */
+};
+
+/* Undo log segment slot in a rollback segment header */
+/*-------------------------------------------------------------*/
+#define	TRX_RSEG_SLOT_PAGE_NO	0	/* Page number of the header page of
+					an undo log segment */
+/*-------------------------------------------------------------*/
+/* Slot size */
+#define TRX_RSEG_SLOT_SIZE	4
+
+/* The offset of the rollback segment header on its page */
+#define	TRX_RSEG		FSEG_PAGE_DATA
+
+/* Transaction rollback segment header */
+/*-------------------------------------------------------------*/
+#define	TRX_RSEG_MAX_SIZE	0	/* Maximum allowed size for rollback
+					segment in pages */
+#define	TRX_RSEG_HISTORY_SIZE	4	/* Number of file pages occupied
+					by the logs in the history list */
+#define	TRX_RSEG_HISTORY	8	/* The update undo logs for committed
+					transactions */
+#define	TRX_RSEG_FSEG_HEADER	(8 + FLST_BASE_NODE_SIZE)
+					/* Header for the file segment where
+					this page is placed */
+#define TRX_RSEG_UNDO_SLOTS	(8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
+					/* Undo log segment slots */
+/*-------------------------------------------------------------*/
+
+#ifndef UNIV_NONINL
+#include "trx0rseg.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0rseg.ic b/innobase/include/trx0rseg.ic
new file mode 100644
index 00000000000..aeb4466ff0f
--- /dev/null
+++ b/innobase/include/trx0rseg.ic
@@ -0,0 +1,112 @@
+/******************************************************
+Rollback segment
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+
+/**********************************************************************
+Gets a rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get(
+/*==========*/
+				/* out: rollback segment header, page
+				x-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number of the header */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	trx_rsegf_t*	header;
+
+	header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+	buf_page_dbg_add_level(header, SYNC_RSEG_HEADER);
+
+	return(header);
+}
+
+/**********************************************************************
+Gets a newly created rollback segment header. */
+UNIV_INLINE
+trx_rsegf_t*
+trx_rsegf_get_new(
+/*==============*/
+				/* out: rollback segment header, page
+				x-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number of the header */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	trx_rsegf_t*	header;
+
+	header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+	buf_page_dbg_add_level(header, SYNC_RSEG_HEADER_NEW);
+
+	return(header);
+}
+
+/*******************************************************************
+Gets the file page number of the nth undo log slot. */
+UNIV_INLINE
+ulint
+trx_rsegf_get_nth_undo(
+/*===================*/
+				/* out: page number of the undo log segment */
+	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
+	ulint		n,	/* in: index of slot */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(n < TRX_RSEG_N_SLOTS);
+
+	return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS +
+				n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
+}
+
+/*******************************************************************
+Sets the file page number of the nth undo log slot. */
+UNIV_INLINE
+void
+trx_rsegf_set_nth_undo(
+/*===================*/
+	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
+	ulint		n,	/* in: index of slot */
+	ulint		page_no,/* in: page number of the undo log segment */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(n < TRX_RSEG_N_SLOTS);
+
+	mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
+						page_no, MLOG_4BYTES, mtr);
+}
+
+/********************************************************************
+Looks for a free slot for an undo log segment. */
+UNIV_INLINE
+ulint
+trx_rsegf_undo_find_free(
+/*=====================*/
+				/* out: slot index or ULINT_UNDEFINED if not
+				found */
+	trx_rsegf_t*	rsegf,	/* in: rollback segment header */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ulint		i;
+	ulint		page_no;
+	
+	for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+
+		page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+
+		if (page_no == FIL_NULL) {
+
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
new file mode 100644
index 00000000000..d0506dd65b7
--- /dev/null
+++ b/innobase/include/trx0sys.h
@@ -0,0 +1,270 @@
+/******************************************************
+Transaction system
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0sys_h
+#define trx0sys_h
+
+#include "univ.i"
+
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "mem0mem.h"
+#include "sync0sync.h"
+#include "ut0lst.h"
+#include "buf0buf.h"
+#include "fil0fil.h"
+#include "fut0lst.h"
+#include "fsp0fsp.h"
+#include "read0types.h"
+
+/* The transaction system */
+extern trx_sys_t*	trx_sys;
+
+/*******************************************************************
+Checks if a page address is the trx sys header page. */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+			/* out: TRUE if trx sys header page */
+	ulint	space,	/* in: space */
+	ulint	page_no);/* in: page number */
+/*********************************************************************
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started. */
+
+void
+trx_sys_init_at_db_start(void);
+/*==========================*/
+/*********************************************************************
+Creates and initializes the transaction system at the database creation. */
+
+void
+trx_sys_create(void);
+/*================*/
+/********************************************************************
+Looks for a free slot for a rollback segment in the trx system file copy. */
+
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+					/* out: slot index or ULINT_UNDEFINED
+					if not found */
+	mtr_t*		mtr);		/* in: mtr */
+/*******************************************************************
+Gets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+				/* out: pointer to rseg object, NULL if slot
+				not in use */
+	trx_sys_t*	sys,	/* in: trx system */
+	ulint		n);	/* in: index of slot */
+/*******************************************************************
+Sets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+void
+trx_sys_set_nth_rseg(
+/*=================*/
+	trx_sys_t*	sys,	/* in: trx system */
+	ulint		n,	/* in: index of slot */
+	trx_rseg_t*	rseg);	/* in: pointer to rseg object, NULL if slot
+				not in use */
+/**************************************************************************
+Gets a pointer to the transaction system file copy and x-locks its page. */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+			/* out: pointer to system file copy, page x-locked */
+	mtr_t*	mtr);	/* in: mtr */
+/*********************************************************************
+Gets the space of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+					/* out: space id */
+	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
+	ulint		i,		/* in: slot index == rseg id */
+	mtr_t*		mtr);		/* in: mtr */
+/*********************************************************************
+Gets the page number of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+					/* out: page number, FIL_NULL
+					if slot unused */
+	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
+	ulint		i,		/* in: slot index == rseg id */
+	mtr_t*		mtr);		/* in: mtr */
+/*********************************************************************
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
+	ulint		i,		/* in: slot index == rseg id */
+	ulint		space,		/* in: space id */
+	mtr_t*		mtr);		/* in: mtr */
+/*********************************************************************
+Sets the page number of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
+	ulint		i,		/* in: slot index == rseg id */
+	ulint		page_no,	/* in: page number, FIL_NULL if
+					the slot is reset to unused */
+	mtr_t*		mtr);		/* in: mtr */
+/*********************************************************************
+Allocates a new transaction id. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_id(void);
+/*========================*/
+			/* out: new, allocated trx id */
+/*********************************************************************
+Allocates a new transaction number. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_no(void);
+/*========================*/
+			/* out: new, allocated trx number */
+/*********************************************************************
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+	byte*	ptr,	/* in: pointer to memory where written */
+	dulint	id);	/* in: id */
+/*********************************************************************
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_trx_id(
+/*============*/
+			/* out: id */
+	byte*	ptr);	/* in: pointer to memory from where to read */
+/********************************************************************
+Looks for the trx handle with the given id in trx_list. */
+UNIV_INLINE
+trx_t*
+trx_get_on_id(
+/*==========*/
+			/* out: the trx handle or NULL if not found */
+	dulint	trx_id);	/* in: trx id to search for */
+/********************************************************************
+Returns the minumum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->conc_state to
+find out if the minimum trx id transaction itself is active, or already
+committed.) */
+UNIV_INLINE
+dulint
+trx_list_get_min_trx_id(void);
+/*=========================*/
+			/* out: the minimum trx id, or trx_sys->max_trx_id
+			if the trx list is empty */
+/********************************************************************
+Checks if a transaction with the given id is active. */
+UNIV_INLINE
+ibool
+trx_is_active(
+/*==========*/
+			/* out: TRUE if active */
+	dulint	trx_id);/* in: trx id of the transaction */
+/********************************************************************
+Checks that trx is in the trx list. */
+
+ibool
+trx_in_trx_list(
+/*============*/
+			/* out: TRUE if is in */
+	trx_t*	in_trx);/* in: trx */
+
+/* The automatically created system rollback segment has this id */
+#define TRX_SYS_SYSTEM_RSEG_ID	0
+
+/* Max number of rollback segments: the number of segment specification slots
+in the transaction system array; rollback segment id must fit in one byte,
+therefore 256 */
+#define	TRX_SYS_N_RSEGS		256
+
+/* Space id and page no where the trx system file copy resides */
+#define	TRX_SYS_SPACE	0	/* the SYSTEM tablespace */
+#define	TRX_SYS_PAGE_NO	FSP_TRX_SYS_PAGE_NO
+
+/* The offset of the transaction system header on the page */
+#define	TRX_SYS		FSEG_PAGE_DATA
+
+/* Transaction system header; protected by trx_sys->mutex */
+/*-------------------------------------------------------------*/
+#define	TRX_SYS_TRX_ID_STORE	0	/* The maximum trx id or trx number
+					modulo TRX_SYS_TRX_ID_UPDATE_MARGIN
+					written to a file page by any
+					transaction; the assignment of
+					transaction ids continues from this
+					number rounded up by .._MARGIN plus
+					.._MARGIN when the database is
+					started */
+#define TRX_SYS_FSEG_HEADER	8	/* segment header for the tablespace
+					segment the trx system is created
+					into */
+#define	TRX_SYS_RSEGS		(8 + FSEG_HEADER_SIZE)	
+					/* the start of the array of rollback
+					segment specification slots */
+/*-------------------------------------------------------------*/
+
+/* The transaction system central memory data structure; protected by the
+kernel mutex */
+struct trx_sys_struct{
+	dulint		max_trx_id;	/* The smallest number not yet
+					assigned as a transaction id or
+					transaction number */
+	UT_LIST_BASE_NODE_T(trx_t) trx_list;
+					/* List of active and committed in
+					memory transactions, sorted on trx id,
+					biggest first */
+	UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list;
+					/* List of rollback segment objects */
+	trx_rseg_t*	latest_rseg;	/* Latest rollback segment in the
+					round-robin assignment of rollback
+					segments to transactions */
+	trx_rseg_t*	rseg_array[TRX_SYS_N_RSEGS];
+					/* Pointer array to rollback segments;
+					NULL if slot not in use */
+	UT_LIST_BASE_NODE_T(read_view_t) view_list;
+					/* List of read views sorted on trx no,
+					biggest first */
+};
+
+/* When a trx id which is zero modulo this number (which must be a power of
+two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
+page is updated */
+#define TRX_SYS_TRX_ID_WRITE_MARGIN	256
+
+#ifndef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic
new file mode 100644
index 00000000000..786e7905933
--- /dev/null
+++ b/innobase/include/trx0sys.ic
@@ -0,0 +1,352 @@
+/******************************************************
+Transaction system
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "srv0srv.h"
+#include "trx0trx.h"
+#include "data0type.h"
+
+/* The typedef for rseg slot in the file copy */
+typedef byte 	trx_sysf_rseg_t;
+
+/* Rollback segment specification slot offsets */
+/*-------------------------------------------------------------*/
+#define	TRX_SYS_RSEG_SPACE	0	/* space where the the segment
+					header is placed */
+#define	TRX_SYS_RSEG_PAGE_NO	4	/*  page number where the the segment
+					header is placed; this is FIL_NULL
+					if the slot is unused */
+/*-------------------------------------------------------------*/
+/* Size of a rollback segment specification slot */
+#define TRX_SYS_RSEG_SLOT_SIZE	8
+
+/*********************************************************************
+Writes the value of max_trx_id to the file based trx system header. */
+
+void
+trx_sys_flush_max_trx_id(void);
+/*==========================*/
+
+/*******************************************************************
+Checks if a page address is the trx sys header page. */
+UNIV_INLINE
+ibool
+trx_sys_hdr_page(
+/*=============*/
+			/* out: TRUE if trx sys header page */
+	ulint	space,	/* in: space */
+	ulint	page_no)/* in: page number */
+{
+	if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*******************************************************************
+Gets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+trx_rseg_t*
+trx_sys_get_nth_rseg(
+/*=================*/
+				/* out: pointer to rseg object, NULL if slot
+				not in use */
+	trx_sys_t*	sys,	/* in: trx system */
+	ulint		n)	/* in: index of slot */
+{
+	ut_ad(mutex_own(&(kernel_mutex)));
+	ut_ad(n < TRX_SYS_N_RSEGS);
+
+	return(sys->rseg_array[n]);
+}
+
+/*******************************************************************
+Sets the pointer in the nth slot of the rseg array. */
+UNIV_INLINE
+void
+trx_sys_set_nth_rseg(
+/*=================*/
+	trx_sys_t*	sys,	/* in: trx system */
+	ulint		n,	/* in: index of slot */
+	trx_rseg_t*	rseg)	/* in: pointer to rseg object, NULL if slot
+				not in use */
+{
+	ut_ad(n < TRX_SYS_N_RSEGS);
+
+	sys->rseg_array[n] = rseg;
+}
+
+/**************************************************************************
+Gets a pointer to the transaction system header and x-latches its page. */
+UNIV_INLINE
+trx_sysf_t*
+trx_sysf_get(
+/*=========*/
+			/* out: pointer to system header, page x-latched. */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	trx_sysf_t*	header;
+
+	ut_ad(mutex_own(&(kernel_mutex)));
+	ut_ad(mtr);
+	
+	header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
+							RW_X_LATCH, mtr);
+
+	buf_page_dbg_add_level(header, SYNC_TRX_SYS_HEADER);
+
+	return(header);
+}
+
+/*********************************************************************
+Gets the space of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_space(
+/*====================*/
+					/* out: space id */
+	trx_sysf_t*	sys_header,	/* in: trx sys header */
+	ulint		i,		/* in: slot index == rseg id */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ut_ad(mutex_own(&(kernel_mutex)));
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
+			+ i * TRX_SYS_RSEG_SLOT_SIZE
+			+ TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
+}
+
+/*********************************************************************
+Gets the page number of the nth rollback segment slot in the trx system
+header. */
+UNIV_INLINE
+ulint
+trx_sysf_rseg_get_page_no(
+/*======================*/
+					/* out: page number, FIL_NULL
+					if slot unused */
+	trx_sysf_t*	sys_header,	/* in: trx system header */
+	ulint		i,		/* in: slot index == rseg id */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ut_ad(sys_header);
+	ut_ad(mutex_own(&(kernel_mutex)));
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
+			+ i * TRX_SYS_RSEG_SLOT_SIZE
+			+ TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
+}
+
+/*********************************************************************
+Sets the space id of the nth rollback segment slot in the trx system
+file copy. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_space(
+/*====================*/
+	trx_sysf_t*	sys_header,	/* in: trx sys file copy */
+	ulint		i,		/* in: slot index == rseg id */
+	ulint		space,		/* in: space id */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ut_ad(mutex_own(&(kernel_mutex)));
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
+			+ i * TRX_SYS_RSEG_SLOT_SIZE
+			+ TRX_SYS_RSEG_SPACE,
+			space,
+			MLOG_4BYTES, mtr);
+}
+
+/*********************************************************************
+Sets the page number of the nth rollback segment slot in the trx system
+header. */
+UNIV_INLINE
+void
+trx_sysf_rseg_set_page_no(
+/*======================*/
+	trx_sysf_t*	sys_header,	/* in: trx sys header */
+	ulint		i,		/* in: slot index == rseg id */
+	ulint		page_no,	/* in: page number, FIL_NULL if the
+					slot is reset to unused */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ut_ad(mutex_own(&(kernel_mutex)));
+	ut_ad(sys_header);
+	ut_ad(i < TRX_SYS_N_RSEGS);
+
+	mlog_write_ulint(sys_header + TRX_SYS_RSEGS
+			+ i * TRX_SYS_RSEG_SLOT_SIZE
+			+ TRX_SYS_RSEG_PAGE_NO,
+			page_no,
+			MLOG_4BYTES, mtr);
+}
+
+/*********************************************************************
+Writes a trx id to an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_trx_id(
+/*=============*/
+	byte*	ptr,	/* in: pointer to memory where written */
+	dulint	id)	/* in: id */
+{
+	ut_ad(DATA_TRX_ID_LEN == 6);
+	
+	mach_write_to_6(ptr, id);
+}
+
+/*********************************************************************
+Reads a trx id from an index page. In case that the id size changes in
+some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_trx_id(
+/*============*/
+			/* out: id */
+	byte*	ptr)	/* in: pointer to memory from where to read */
+{
+	ut_ad(DATA_TRX_ID_LEN == 6);
+	
+	return(mach_read_from_6(ptr));
+}
+
+/********************************************************************
+Looks for the trx handle with the given id in trx_list. */
+UNIV_INLINE
+trx_t*
+trx_get_on_id(
+/*==========*/
+			/* out: the trx handle or NULL if not found */
+	dulint	trx_id)	/* in: trx id to search for */
+{
+	trx_t*	trx;
+
+	ut_ad(mutex_own(&(kernel_mutex)));
+
+	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+	while (trx != NULL) {
+		if (0 == ut_dulint_cmp(trx_id, trx->id)) {
+
+			return(trx);
+		}
+
+		trx = UT_LIST_GET_NEXT(trx_list, trx);
+	}
+
+	return(NULL);
+}
+
+/********************************************************************
+Returns the minumum trx id in trx list. This is the smallest id for which
+the trx can possibly be active. (But, you must look at the trx->conc_state to
+find out if the minimum trx id transaction itself is active, or already
+committed.) */
+UNIV_INLINE
+dulint
+trx_list_get_min_trx_id(void)
+/*=========================*/
+			/* out: the minimum trx id, or trx_sys->max_trx_id
+			if the trx list is empty */
+{
+	trx_t*	trx;
+
+	ut_ad(mutex_own(&(kernel_mutex)));
+
+	trx = UT_LIST_GET_LAST(trx_sys->trx_list);
+
+	if (trx == NULL) {
+
+		return(trx_sys->max_trx_id);
+	}
+
+	return(trx->id);
+}
+
+/********************************************************************
+Checks if a transaction with the given id is active. */
+UNIV_INLINE
+ibool
+trx_is_active(
+/*==========*/
+			/* out: TRUE if active */
+	dulint	trx_id)	/* in: trx id of the transaction */
+{
+	trx_t*	trx;
+
+	ut_ad(mutex_own(&(kernel_mutex)));
+
+	if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) {
+
+		return(FALSE);
+	}
+
+	trx = trx_get_on_id(trx_id);
+	if (trx && (trx->conc_state == TRX_ACTIVE)) {
+		
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************
+Allocates a new transaction id. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_id(void)
+/*========================*/
+			/* out: new, allocated trx id */
+{
+	dulint	id;
+
+	ut_ad(mutex_own(&kernel_mutex));
+
+	/* VERY important: after the database is started, max_trx_id value is
+	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
+	will evaluate to TRUE when this function is first time called,
+	and the value for trx id will be written to disk-based header!
+	Thus trx id values will not overlap when the database is
+	repeatedly started! */
+	
+	if (ut_dulint_get_low(trx_sys->max_trx_id)
+				% TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
+
+		trx_sys_flush_max_trx_id();
+	}
+	
+	id = trx_sys->max_trx_id;
+
+	UT_DULINT_INC(trx_sys->max_trx_id);
+
+	return(id);
+}
+
+/*********************************************************************
+Allocates a new transaction number. */
+UNIV_INLINE
+dulint
+trx_sys_get_new_trx_no(void)
+/*========================*/
+			/* out: new, allocated trx number */
+{
+	ut_ad(mutex_own(&kernel_mutex));
+
+	return(trx_sys_get_new_trx_id());
+}
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
new file mode 100644
index 00000000000..e2a1b4435e7
--- /dev/null
+++ b/innobase/include/trx0trx.h
@@ -0,0 +1,412 @@
+/******************************************************
+The transaction
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0trx_h
+#define trx0trx_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "lock0types.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "mem0mem.h"
+#include "read0types.h"
+
+/* If this flag is defined, then unneeded update undo logs are discarded,
+saving CPU time. The kernel mutex contention is increased, however. */
+
+#define TRX_UPDATE_UNDO_OPT
+
+extern ulint	trx_n_mysql_transactions;
+
+/********************************************************************
+Creates and initializes a transaction object. */
+
+trx_t*
+trx_create(
+/*=======*/
+			/* out, own: the transaction */
+	sess_t*	sess);	/* in: session or NULL */
+/************************************************************************
+Creates a transaction object for MySQL. */
+
+trx_t*
+trx_allocate_for_mysql(void);
+/*========================*/
+				/* out, own: transaction object */
+/************************************************************************
+Frees a transaction object. */
+
+void
+trx_free(
+/*=====*/
+	trx_t*	trx);	/* in, own: trx object */
+/************************************************************************
+Frees a transaction object for MySQL. */
+
+void
+trx_free_for_mysql(
+/*===============*/
+	trx_t*	trx);	/* in, own: trx object */
+/********************************************************************
+Creates trx objects for transactions and initializes the trx list of
+trx_sys at database start. Rollback segment and undo log lists must
+already exist when this function is called, because the lists of
+transactions to be rolled back or cleaned up are built based on the
+undo log lists. */
+
+void
+trx_lists_init_at_db_start(void);
+/*============================*/
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start(
+/*======*/
+			/* out: TRUE if success, FALSE if the rollback
+			segment could not support this many transactions */
+	trx_t* 	trx,	/* in: transaction */
+	ulint	rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+			is passed, the system chooses the rollback segment
+			automatically in a round-robin fashion */
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start_low(
+/*==========*/
+			/* out: TRUE */
+	trx_t* 	trx,	/* in: transaction */
+	ulint	rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED
+			is passed, the system chooses the rollback segment
+			automatically in a round-robin fashion */
+/*****************************************************************
+Starts the transaction if it is not yet started. */
+UNIV_INLINE
+void
+trx_start_if_not_started(
+/*=====================*/
+	trx_t*	trx);	/* in: transaction */
+/********************************************************************
+Commits a transaction. */
+
+void
+trx_commit_off_kernel(
+/*==================*/
+	trx_t*	trx);	/* in: transaction */
+/**************************************************************************
+Does the transaction commit for MySQL. */
+
+ulint
+trx_commit_for_mysql(
+/*=================*/
+			/* out: 0 or error number */
+	trx_t*	trx);	/* in: trx handle */
+/**************************************************************************
+Marks the latest SQL statement ended. */
+
+void
+trx_mark_sql_stat_end(
+/*==================*/
+	trx_t*	trx);	/* in: trx handle */
+/************************************************************************
+Assigns a read view for a consistent read query. All the consistent reads
+within the same transaction will get the same read view, which is created
+when this function is first called for a new started transaction. */
+
+read_view_t*
+trx_assign_read_view(
+/*=================*/
+			/* out: consistent read view */
+	trx_t*	trx);	/* in: active transaction */
+/***************************************************************
+The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
+the TRX_QUE_RUNNING state and releases query threads which were
+waiting for a lock in the wait_thrs list. */
+
+void
+trx_end_lock_wait(
+/*==============*/
+	trx_t*	trx);	/* in: transaction */
+/********************************************************************
+Sends a signal to a trx object. */
+
+ibool
+trx_sig_send(
+/*=========*/
+					/* out: TRUE if the signal was
+					successfully delivered */
+	trx_t*		trx,		/* in: trx handle */
+	ulint		type,		/* in: signal type */
+	ulint		sender,		/* in: TRX_SIG_SELF or
+					TRX_SIG_OTHER_SESS */
+	ibool		reply,		/* in: TRUE if the sender of the signal
+					wants reply after the operation induced
+					by the signal is completed; if type
+					is TRX_SIG_END_WAIT, this must be
+					FALSE */
+	que_thr_t*	receiver_thr,	/* in: query thread which wants the
+					reply, or NULL */
+	trx_savept_t* 	savept,		/* in: possible rollback savepoint, or
+					NULL */
+	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+					if the value which is passed in is
+					a pointer to a NULL pointer, then the
+					calling function can start running
+					a new query thread; if the parameter
+					is NULL, it is ignored */
+/********************************************************************
+Send the reply message when a signal in the queue of the trx has
+been handled. */
+
+void
+trx_sig_reply(
+/*==========*/
+	trx_t*		trx,		/* in: trx handle */
+	trx_sig_t*	sig,		/* in: signal */
+	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+					if the value which is passed in is
+					a pointer to a NULL pointer, then the
+					calling function can start running
+					a new query thread */
+/********************************************************************
+Removes the signal object from a trx signal queue. */
+
+void
+trx_sig_remove(
+/*===========*/
+	trx_t*		trx,	/* in: trx handle */
+	trx_sig_t*	sig);	/* in, own: signal */
+/********************************************************************
+Starts handling of a trx signal. */
+
+void
+trx_sig_start_handle(
+/*=================*/
+	trx_t*		trx,		/* in: trx handle */
+	que_thr_t**	next_thr);	/* in/out: next query thread to run;
+					if the value which is passed in is
+					a pointer to a NULL pointer, then the
+					calling function can start running
+					a new query thread */
+/********************************************************************
+Ends signal handling. If the session is in the error state, and
+trx->graph_before_signal_handling != NULL, returns control to the error
+handling routine of the graph (currently only returns the control to the
+graph root which then sends an error message to the client). */
+
+void
+trx_end_signal_handling(
+/*====================*/
+	trx_t*	trx);	/* in: trx */
+/*************************************************************************
+Creates a commit command node struct. */
+
+commit_node_t*
+commit_node_create(
+/*===============*/
+				/* out, own: commit node struct */
+	mem_heap_t*	heap);	/* in: mem heap where created */
+/***************************************************************
+Performs an execution step for a commit type node in a query graph. */
+
+que_thr_t*
+trx_commit_step(
+/*============*/
+				/* out: query thread to run next, or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+
+
+/* Signal to a transaction */
+struct trx_sig_struct{
+	ulint		type;		/* signal type */
+	ulint		state;		/* TRX_SIG_WAITING or
+					TRX_SIG_BEING_HANDLED */
+	ulint		sender;		/* TRX_SIG_SELF or
+					TRX_SIG_OTHER_SESS */
+	ibool		reply;		/* TRUE if the sender of the signal
+					wants reply after the operation induced
+					by the signal is completed; if this
+					field is TRUE and the receiver field
+					below is NULL, then a SUCCESS message
+					is sent to the client of the session
+					to which this trx belongs */
+	que_thr_t*	receiver;	/* query thread which wants the reply,
+					or NULL */
+	trx_savept_t	savept;		/* possible rollback savepoint */
+	UT_LIST_NODE_T(trx_sig_t)
+			signals;	/* queue of pending signals to the
+					transaction */
+	UT_LIST_NODE_T(trx_sig_t)
+			reply_signals;	/* list of signals for which the sender
+					transaction is waiting a reply */
+};
+
+/* The transaction handle; every session has a trx object which is freed only
+when the session is freed; in addition there may be session-less transactions
+rolling back after a database recovery */
+
+struct trx_struct{
+	/* All the next fields are protected by the kernel mutex, except the
+	undo logs which are protected by undo_mutex */
+	ulint		type;		/* TRX_USER, TRX_PURGE */
+	ulint		conc_state;	/* state of the trx from the point
+					of view of concurrency control:
+					TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
+					... */
+	dulint		id;		/* transaction id */
+	dulint		no;		/* transaction serialization number ==
+					max trx id when the transaction is 
+					moved to COMMITTED_IN_MEMORY state */
+	ibool		dict_operation;	/* TRUE if the trx is used to create
+					a table, create an index, or drop a
+					table */
+	dulint		table_id;	/* table id if the preceding field is
+					TRUE */
+	os_thread_id_t	mysql_thread_id;/* id of the MySQL thread associated
+					with this transaction object */
+	ulint		n_mysql_tables_in_use; /* number of Innobase tables
+					used in the processing of the current
+					SQL statement in MySQL */
+	UT_LIST_NODE_T(trx_t) 
+			trx_list;	/* list of transactions */
+	/*------------------------------*/
+	mutex_t		undo_mutex;	/* mutex protecting the fields in this
+					section (down to undo_no_arr), EXCEPT
+					last_sql_stat_start, which can be
+					accessed only when we know that there
+					cannot be any activity in the undo
+					logs! */
+	dulint		undo_no;	/* next undo log record number to
+					assign */
+	trx_savept_t	last_sql_stat_start;
+					/* undo_no when the last sql statement
+					was started: in case of an error, trx
+					is rolled back down to this undo
+					number; see note at undo_mutex! */
+	trx_rseg_t*	rseg;		/* rollback segment assigned to the
+					transaction, or NULL if not assigned
+					yet */
+	trx_undo_t*	insert_undo;	/* pointer to the insert undo log, or 
+					NULL if no inserts performed yet */
+	trx_undo_t* 	update_undo;	/* pointer to the update undo log, or
+					NULL if no update performed yet */
+	dulint		roll_limit;	/* least undo number to undo during
+					a rollback */
+	ulint		pages_undone;	/* number of undo log pages undone
+					since the last undo log truncation */
+	trx_undo_arr_t*	undo_no_arr;	/* array of undo numbers of undo log
+					records which are currently processed
+					by a rollback operation */
+	/*------------------------------*/
+	ulint		error_state;	/* 0 if no error, otherwise error
+					number */
+	sess_t*		sess;		/* session of the trx, NULL if none */
+ 	ulint		que_state;	/* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
+					... */
+	que_t*		graph;		/* query currently run in the session,
+					or NULL if none; NOTE that the query
+					belongs to the session, and it can
+					survive over a transaction commit, if
+					it is a stored procedure with a COMMIT
+					WORK statement, for instance */
+	ulint		n_active_thrs;	/* number of active query threads */
+	ibool		handling_signals;/* this is TRUE as long as the trx
+					is handling signals */
+	que_t*		graph_before_signal_handling;
+					/* value of graph when signal handling
+					for this trx started: this is used to
+					return control to the original query
+					graph for error processing */
+	trx_sig_t	sig;		/* one signal object can be allocated
+					in this space, avoiding mem_alloc */
+	UT_LIST_BASE_NODE_T(trx_sig_t)
+			signals;	/* queue of processed or pending
+					signals to the trx */
+	UT_LIST_BASE_NODE_T(trx_sig_t)
+			reply_signals;	/* list of signals sent by the query
+					threads of this trx for which a thread
+					is waiting for a reply; if this trx is
+					killed, the reply requests in the list
+					must be canceled */
+	/*------------------------------*/
+	lock_t*		wait_lock;	/* if trx execution state is
+					TRX_QUE_LOCK_WAIT, this points to
+					the lock request, otherwise this is
+					NULL */
+	UT_LIST_BASE_NODE_T(que_thr_t)
+			wait_thrs;	/* query threads belonging to this
+					trx that are in the QUE_THR_LOCK_WAIT
+					state */
+	/*------------------------------*/
+	mem_heap_t*	lock_heap;	/* memory heap for the locks of the
+					transaction; protected by
+					lock_heap_mutex */
+	UT_LIST_BASE_NODE_T(lock_t) 
+			trx_locks;	/* locks reserved by the transaction;
+					protected by lock_heap_mutex */
+	/*------------------------------*/
+	mem_heap_t*	read_view_heap;	/* memory heap for the read view */
+	read_view_t*	read_view;	/* consistent read view or NULL */
+};
+
+#define TRX_MAX_N_THREADS	32	/* maximum number of concurrent
+					threads running a single operation of
+					a transaction, e.g., a parallel query */
+/* Transaction types */
+#define	TRX_USER		1	/* normal user transaction */
+#define	TRX_PURGE		2	/* purge transaction: this is not
+					inserted to the trx list of trx_sys
+					and no rollback segment is assigned to
+					this */
+/* Transaction concurrency states */
+#define	TRX_NOT_STARTED		1
+#define	TRX_ACTIVE		2
+#define	TRX_COMMITTED_IN_MEMORY	3
+
+/* Transaction execution states when trx state is TRX_ACTIVE */
+#define TRX_QUE_RUNNING		1	/* transaction is running */
+#define TRX_QUE_LOCK_WAIT	2	/* transaction is waiting for a lock */
+#define TRX_QUE_ROLLING_BACK	3	/* transaction is rolling back */
+#define TRX_QUE_COMMITTING	4	/* transaction is committing */
+
+/* Types of a trx signal */
+#define TRX_SIG_NO_SIGNAL		100
+#define TRX_SIG_TOTAL_ROLLBACK		1
+#define TRX_SIG_ROLLBACK_TO_SAVEPT	2
+#define TRX_SIG_COMMIT			3
+#define	TRX_SIG_ERROR_OCCURRED		4
+#define TRX_SIG_BREAK_EXECUTION		5
+
+/* Sender types of a signal */
+#define TRX_SIG_SELF		1	/* sent by the session itself, or
+					by an error occurring within this
+					session */
+#define TRX_SIG_OTHER_SESS	2	/* sent by another session (which
+					must hold rights to this) */
+/* Signal states */
+#define	TRX_SIG_WAITING		1
+#define TRX_SIG_BEING_HANDLED	2
+					
+/* Commit command node in a query graph */
+struct commit_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_COMMIT */
+	ulint		state;	/* node execution state */
+};
+
+/* Commit node states */
+#define COMMIT_NODE_SEND	1
+#define COMMIT_NODE_WAIT	2
+
+
+#ifndef UNIV_NONINL
+#include "trx0trx.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0trx.ic b/innobase/include/trx0trx.ic
new file mode 100644
index 00000000000..9d453047600
--- /dev/null
+++ b/innobase/include/trx0trx.ic
@@ -0,0 +1,23 @@
+/******************************************************
+The transaction
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+/*****************************************************************
+Starts the transaction if it is not yet started. */
+UNIV_INLINE
+void
+trx_start_if_not_started(
+/*=====================*/
+	trx_t*	trx)	/* in: transaction */
+{	
+	ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY);
+
+	if (trx->conc_state == TRX_NOT_STARTED) {
+
+		trx_start(trx, ULINT_UNDEFINED);
+	}
+}
diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h
new file mode 100644
index 00000000000..02da1605077
--- /dev/null
+++ b/innobase/include/trx0types.h
@@ -0,0 +1,43 @@
+/******************************************************
+Transaction system global type definitions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0types_h
+#define trx0types_h
+
+#include "lock0types.h"
+#include "ut0byte.h"
+
+/* Memory objects */
+typedef struct trx_struct	trx_t;
+typedef struct trx_sys_struct	trx_sys_t;
+typedef struct trx_sig_struct	trx_sig_t;
+typedef struct trx_rseg_struct	trx_rseg_t;
+typedef struct trx_undo_struct	trx_undo_t;
+typedef struct trx_undo_arr_struct trx_undo_arr_t;
+typedef struct trx_undo_inf_struct trx_undo_inf_t;
+typedef struct trx_purge_struct	trx_purge_t;
+typedef struct roll_node_struct	roll_node_t;
+typedef struct commit_node_struct commit_node_t;
+
+/* Transaction savepoint */
+typedef struct trx_savept_struct trx_savept_t;
+struct trx_savept_struct{
+	dulint	least_undo_no;	/* least undo number to undo */
+};
+
+/* File objects */
+typedef byte	trx_sysf_t;
+typedef byte	trx_rsegf_t;
+typedef byte	trx_usegf_t;
+typedef byte	trx_ulogf_t;
+typedef byte	trx_upagef_t;
+
+/* Undo log record */
+typedef	byte	trx_undo_rec_t;
+
+#endif 
diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h
new file mode 100644
index 00000000000..82c21f756e6
--- /dev/null
+++ b/innobase/include/trx0undo.h
@@ -0,0 +1,473 @@
+/******************************************************
+Transaction undo log
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0undo_h
+#define trx0undo_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "mtr0mtr.h"
+#include "trx0sys.h"
+#include "page0types.h"
+
+/***************************************************************************
+Builds a roll pointer dulint. */
+UNIV_INLINE
+dulint
+trx_undo_build_roll_ptr(
+/*====================*/
+				/* out: roll pointer */
+	ibool	is_insert,	/* in: TRUE if insert undo log */
+	ulint	rseg_id,	/* in: rollback segment id */
+	ulint	page_no,	/* in: page number */
+	ulint	offset);		/* in: offset of the undo entry within page */
+/***************************************************************************
+Decodes a roll pointer dulint. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+	dulint	roll_ptr,	/* in: roll pointer */
+	ibool*	is_insert,	/* out: TRUE if insert undo log */
+	ulint*	rseg_id,	/* out: rollback segment id */
+	ulint*	page_no,	/* out: page number */
+	ulint*	offset);		/* out: offset of the undo entry within page */
+/***************************************************************************
+Returns TRUE if the roll pointer is of the insert type. */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+				/* out: TRUE if insert undo log */
+	dulint	roll_ptr);	/* in: roll pointer */
+/*********************************************************************
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+	byte*	ptr,		/* in: pointer to memory where written */
+	dulint	roll_ptr);	/* in: roll ptr */
+/*********************************************************************
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_roll_ptr(
+/*==============*/
+			/* out: roll ptr */
+	byte*	ptr);	/* in: pointer to memory from where to read */
+/**********************************************************************
+Gets an undo log page and x-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*===============*/
+				/* out: pointer to page x-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number */
+	mtr_t*	mtr);		/* in: mtr */
+/**********************************************************************
+Gets an undo log page and s-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*=========================*/
+				/* out: pointer to page s-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number */
+	mtr_t*	mtr);		/* in: mtr */
+/**********************************************************************
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+				/* out: pointer to record, NULL if none */
+	trx_undo_rec_t*	rec,	/* in: undo log record */
+	ulint		page_no,/* in: undo log header page number */
+	ulint		offset);	/* in: undo log header offset on page */
+/**********************************************************************
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+				/* out: pointer to record, NULL if none */
+	trx_undo_rec_t*	rec,	/* in: undo log record */
+	ulint		page_no,/* in: undo log header page number */
+	ulint		offset);	/* in: undo log header offset on page */
+/**********************************************************************
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+			/* out: pointer to record, NULL if none */
+	page_t*	undo_page,/* in: undo log page */
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset);	/* in: undo log header offset on page */
+/**********************************************************************
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+			/* out: pointer to record, NULL if none */
+	page_t*	undo_page,/* in: undo log page */
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset);	/* in: undo log header offset on page */
+/***************************************************************************
+Gets the previous record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_prev_rec(
+/*==================*/
+				/* out: undo log record, the page s-latched,
+				NULL if none */
+	trx_undo_rec_t*	rec,	/* in: undo record */
+	ulint		page_no,/* in: undo log header page number */
+	ulint		offset,	/* in: undo log header offset on page */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************************
+Gets the next record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_next_rec(
+/*==================*/
+				/* out: undo log record, the page s-latched,
+				NULL if none */
+	trx_undo_rec_t*	rec,	/* in: undo record */
+	ulint		page_no,/* in: undo log header page number */
+	ulint		offset,	/* in: undo log header offset on page */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************************
+Gets the first record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_first_rec(
+/*===================*/
+			/* out: undo log record, the page latched, NULL if
+			none */
+	ulint	space,	/* in: undo log header space */	
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset,	/* in: undo log header offset on page */
+	ulint	mode,	/* in: latching mode: RW_S_LATCH or RW_X_LATCH */
+	mtr_t*	mtr);	/* in: mtr */
+/************************************************************************
+Tries to add a page to the undo log segment where the undo log is placed. */
+
+ulint
+trx_undo_add_page(
+/*==============*/
+				/* out: page number if success, else
+				FIL_NULL */
+	trx_t*		trx,	/* in: transaction */
+	trx_undo_t*	undo,	/* in: undo log memory object */
+	mtr_t*		mtr);	/* in: mtr which does not have a latch to any
+				undo log page; the caller must have reserved
+				the rollback segment mutex */
+/***************************************************************************
+Truncates an undo log from the end. This function is used during a rollback
+to free space from an undo log. */
+
+void
+trx_undo_truncate_end(
+/*==================*/
+	trx_t*		trx,	/* in: transaction whose undo log it is */
+	trx_undo_t*	undo,	/* in: undo log */
+	dulint		limit);	/* in: all undo records with undo number
+				>= this value should be truncated */
+/***************************************************************************
+Truncates an undo log from the start. This function is used during a purge
+operation. */
+
+void
+trx_undo_truncate_start(
+/*====================*/
+	trx_rseg_t* rseg,	/* in: rollback segment */
+	ulint	space,		/* in: space id of the log */
+	ulint	hdr_page_no,	/* in: header page number */
+	ulint	hdr_offset,	/* in: header offset on the page */
+	dulint	limit);		/* in: all undo pages with undo numbers <
+				this value should be truncated; NOTE that
+				the function only frees whole pages; the
+				header page is not freed, but emptied, if
+				all the records there are < limit */
+/************************************************************************
+Initializes the undo log lists for a rollback segment memory copy.
+This function is only called when the database is started or a new
+rollback segment created. */
+
+ulint
+trx_undo_lists_init(
+/*================*/
+				/* out: the combined size of undo log segments
+				in pages */
+	trx_rseg_t*	rseg);	/* in: rollback segment memory object */	
+/**************************************************************************
+Assigns an undo log for a transaction. A new undo log is created or a cached
+undo log reused. */
+
+trx_undo_t*
+trx_undo_assign_undo(
+/*=================*/
+			/* out: the undo log, NULL if did not succeed: out of
+			space */
+	trx_t*	trx,	/* in: transaction */
+	ulint	type);	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+/**********************************************************************
+Sets the state of the undo log segment at a transaction finish. */
+
+page_t*
+trx_undo_set_state_at_finish(
+/*=========================*/
+				/* out: undo log segment header page,
+				x-latched */
+	trx_t*		trx,	/* in: transaction */
+	trx_undo_t*	undo,	/* in: undo log memory copy */
+	mtr_t*		mtr);	/* in: mtr */
+/**************************************************************************
+Adds the update undo log header as the first in the history list, and
+frees the memory object, or puts it to the list of cached update undo log
+segments. */
+
+void
+trx_undo_update_cleanup(
+/*====================*/
+	trx_t*	trx,		/* in: trx owning the update undo log */
+	page_t*	undo_page,	/* in: update undo log header page,
+				x-latched */
+	mtr_t*	mtr);		/* in: mtr */
+/**************************************************************************
+Discards an undo log and puts the segment to the list of cached update undo
+log segments. This optimized function is called if there is no need to
+keep the update undo log because there exist no read views and the transaction
+made no delete markings, which would make purge necessary. We restrict this
+to undo logs of size 1 to make things simpler. */
+
+dulint
+trx_undo_update_cleanup_by_discard(
+/*===============================*/
+			/* out: log sequence number at which mtr is
+			committed */	
+	trx_t*	trx,	/* in: trx owning the update undo log */
+	mtr_t*	mtr);	/* in: mtr */
+/**********************************************************************
+Frees or caches an insert undo log after a transaction commit or rollback.
+Knowledge of inserts is not needed after a commit or rollback, therefore
+the data can be discarded. */
+
+void
+trx_undo_insert_cleanup(
+/*====================*/
+	trx_t*	trx);	/* in: transaction handle */
+/***************************************************************
+Parses the redo log entry of an undo log page initialization. */
+
+byte*
+trx_undo_parse_page_init(
+/*======================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/***************************************************************
+Parses the redo log entry of an undo log page header create or reuse. */
+
+byte*
+trx_undo_parse_page_header(
+/*=======================*/
+			/* out: end of log record or NULL */
+	ulint	type,	/* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/***************************************************************
+Parses the redo log entry of an undo log page header discard. */
+
+byte*
+trx_undo_parse_discard_latest(
+/*==========================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+
+
+/* Types of an undo log segment */
+#define	TRX_UNDO_INSERT		1	/* contains undo entries for inserts */
+#define	TRX_UNDO_UPDATE		2	/* contains undo entries for updates
+					and delete markings: in short,
+					modifys (the name 'UPDATE' is a
+					historical relic) */
+/* States of an undo log segment */
+#define TRX_UNDO_ACTIVE		1	/* contains an undo log of an active
+					transaction */
+#define	TRX_UNDO_CACHED		2	/* cached for quick reuse */
+#define	TRX_UNDO_TO_FREE	3	/* insert undo segment can be freed */
+#define	TRX_UNDO_TO_PURGE	4	/* update undo segment will not be
+					reused: it can be freed in purge when
+					all undo data in it is removed */
+
+/* Transaction undo log memory object; this is protected by the undo_mutex
+in the corresponding transaction object */
+
+struct trx_undo_struct{
+	/*-----------------------------*/
+	ulint		id;		/* undo log slot number within the
+					rollback segment */
+	ulint		type;		/* TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
+	ulint		state;		/* state of the corresponding undo log
+					segment */
+	ibool		del_marks;	/* relevant only in an update undo log:
+					this is TRUE if the transaction may
+					have delete marked records, because of
+					a delete of a row or an update of an
+					indexed field; purge is then
+					necessary. */
+	dulint		trx_id;		/* id of the trx assigned to the undo
+					log */
+	ibool		dict_operation;	/* TRUE if a dict operation trx */
+	dulint		table_id;	/* if a dict operation, then the table
+					id */
+	trx_rseg_t*	rseg;		/* rseg where the undo log belongs */
+	/*-----------------------------*/
+	ulint		space;		/* space id where the undo log
+					placed */
+	ulint		hdr_page_no;	/* page number of the header page in
+					the undo log */
+	ulint		hdr_offset;	/* header offset of the undo log on the
+					page */
+	ulint		last_page_no;	/* page number of the last page in the
+					undo log; this may differ from
+					top_page_no during a rollback */
+	ulint		size;		/* current size in pages */
+	/*-----------------------------*/
+	ulint		empty;		/* TRUE if the stack of undo log
+					records is currently empty */
+	ulint		top_page_no;	/* page number where the latest undo
+					log record was catenated; during
+					rollback the page from which the latest
+					undo record was chosen */
+	ulint		top_offset;	/* offset of the latest undo record,
+					i.e., the topmost element in the undo
+					log if we think of it as a stack */
+	dulint		top_undo_no;	/* undo number of the latest record */
+	page_t*		guess_page;	/* guess for the buffer frame where
+					the top page might reside */
+	/*-----------------------------*/
+	UT_LIST_NODE_T(trx_undo_t) undo_list;
+					/* undo log objects in the rollback
+					segment are chained into lists */
+};
+
+/* The offset of the undo log page header on pages of the undo log */
+#define	TRX_UNDO_PAGE_HDR	FSEG_PAGE_DATA
+/*-------------------------------------------------------------*/
+/* Transaction undo log page header offsets */
+#define	TRX_UNDO_PAGE_TYPE	0	/* TRX_UNDO_INSERT or
+					TRX_UNDO_UPDATE */
+#define	TRX_UNDO_PAGE_START	2	/* Byte offset where the undo log
+					records for the LATEST transaction
+					start on this page (remember that
+					in an update undo log, the first page
+					can contain several undo logs) */
+#define	TRX_UNDO_PAGE_FREE	4	/* On each page of the undo log this
+					field contains the byte offset of the
+					first free byte on the page */
+#define TRX_UNDO_PAGE_NODE	6	/* The file list node in the chain
+					of undo log pages */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_PAGE_HDR_SIZE	(6 + FLST_NODE_SIZE)
+
+/* An update undo segment with just one page can be reused if it has
+< this number bytes used */
+
+#define TRX_UNDO_PAGE_REUSE_LIMIT	(3 * UNIV_PAGE_SIZE / 4)
+
+/* An update undo log segment may contain several undo logs on its first page
+if the undo logs took so little space that the segment could be cached and
+reused. All the undo log headers are then on the first page, and the last one
+owns the undo log records on subsequent pages if the segment is bigger than
+one page. If an undo log is stored in a segment, then on the first page it is
+allowed to have zero undo records, but if the segment extends to several
+pages, then all the rest of the pages must contain at least one undo log
+record. */
+
+/* The offset of the undo log segment header on the first page of the undo
+log segment */
+
+#define	TRX_UNDO_SEG_HDR	(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
+/*-------------------------------------------------------------*/
+#define	TRX_UNDO_STATE		0	/* TRX_UNDO_ACTIVE, ... */
+#define	TRX_UNDO_LAST_LOG	2	/* Offset of the last undo log header
+					on the segment header page, 0 if
+					none */
+#define	TRX_UNDO_FSEG_HEADER	4	/* Header for the file segment which
+					the undo log segment occupies */
+#define	TRX_UNDO_PAGE_LIST	(4 + FSEG_HEADER_SIZE)
+					/* Base node for the list of pages in
+					the undo log segment; defined only on
+					the undo log segment's first page */
+/*-------------------------------------------------------------*/
+/* Size of the undo log segment header */
+#define TRX_UNDO_SEG_HDR_SIZE	(4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
+
+
+/* The undo log header. There can be several undo log headers on the first
+page of an update undo log segment. */
+/*-------------------------------------------------------------*/
+#define	TRX_UNDO_TRX_ID		0	/* Transaction id */
+#define	TRX_UNDO_TRX_NO		8	/* Transaction number of the
+					transaction; defined only if the log
+					is in a history list */
+#define TRX_UNDO_DEL_MARKS	16	/* Defined only in an update undo
+					log: TRUE if the transaction may have
+					done delete markings of records, and
+					thus purge is necessary */
+#define	TRX_UNDO_LOG_START	18	/* Offset of the first undo log record
+					of this log on the header page; purge
+					may remove undo log record from the
+					log start, and therefore this is not
+					necessarily the same as this log
+					header end offset */
+#define	TRX_UNDO_DICT_OPERATION	20	/* TRUE if the transaction is a table
+					create, index create, or drop
+					transaction: in recovery
+					the transaction cannot be rolled back
+					in the usual way: a 'rollback' rather
+					means dropping the created or dropped
+					table, if it still exists */
+#define TRX_UNDO_TABLE_ID	22	/* Id of the table if the preceding
+					field is TRUE */
+#define	TRX_UNDO_NEXT_LOG	30	/* Offset of the next undo log header
+					on this page, 0 if none */
+#define	TRX_UNDO_PREV_LOG	32	/* Offset of the previous undo log
+					header on this page, 0 if none */
+#define TRX_UNDO_HISTORY_NODE	34	/* If the log is put to the history
+					list, the file list node is here */
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_LOG_HDR_SIZE	(34 + FLST_NODE_SIZE)
+
+#ifndef UNIV_NONINL
+#include "trx0undo.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/trx0undo.ic b/innobase/include/trx0undo.ic
new file mode 100644
index 00000000000..bedbc02b00b
--- /dev/null
+++ b/innobase/include/trx0undo.ic
@@ -0,0 +1,319 @@
+/******************************************************
+Transaction undo log
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "data0type.h"
+
+/***************************************************************************
+Builds a roll pointer dulint. */
+UNIV_INLINE
+dulint
+trx_undo_build_roll_ptr(
+/*====================*/
+				/* out: roll pointer */
+	ibool	is_insert,	/* in: TRUE if insert undo log */
+	ulint	rseg_id,	/* in: rollback segment id */
+	ulint	page_no,	/* in: page number */
+	ulint	offset)		/* in: offset of the undo entry within page */
+{
+	ut_ad(DATA_ROLL_PTR_LEN == 7);
+	ut_ad(rseg_id < 128);
+
+	return(ut_dulint_create(is_insert * 128 * 256 * 256
+				+ rseg_id * 256 * 256
+				+ (page_no / 256) / 256,
+				(page_no % (256 * 256)) * 256 * 256
+				+ offset));
+}
+
+/***************************************************************************
+Decodes a roll pointer dulint. */
+UNIV_INLINE
+void
+trx_undo_decode_roll_ptr(
+/*=====================*/
+	dulint	roll_ptr,	/* in: roll pointer */
+	ibool*	is_insert,	/* out: TRUE if insert undo log */
+	ulint*	rseg_id,	/* out: rollback segment id */
+	ulint*	page_no,	/* out: page number */
+	ulint*	offset)		/* out: offset of the undo entry within page */
+{
+	ulint	low;
+	ulint	high;
+
+	ut_ad(DATA_ROLL_PTR_LEN == 7);
+	ut_ad(TRUE == 1);
+	
+	high = ut_dulint_get_high(roll_ptr);
+	low = ut_dulint_get_low(roll_ptr);
+	
+	*offset = low % (256 * 256);
+
+	*is_insert = high / (256 * 256 * 128);	/* TRUE == 1 */
+	*rseg_id = (high / (256 * 256)) % 128;
+
+	*page_no = (high % (256 * 256)) * 256 * 256
+		   + (low / 256) / 256;
+}
+
+/***************************************************************************
+Returns TRUE if the roll pointer is of the insert type. */
+UNIV_INLINE
+ibool
+trx_undo_roll_ptr_is_insert(
+/*========================*/
+				/* out: TRUE if insert undo log */
+	dulint	roll_ptr)	/* in: roll pointer */
+{
+	ulint	high;
+
+	ut_ad(DATA_ROLL_PTR_LEN == 7);
+	ut_ad(TRUE == 1);
+
+	high = ut_dulint_get_high(roll_ptr);
+	
+	return(high / (256 * 256 * 128));
+}
+
+/*********************************************************************
+Writes a roll ptr to an index page. In case that the size changes in
+some future version, this function should be used instead of
+mach_write_... */
+UNIV_INLINE
+void
+trx_write_roll_ptr(
+/*===============*/
+	byte*	ptr,		/* in: pointer to memory where written */
+	dulint	roll_ptr)	/* in: roll ptr */
+{
+	ut_ad(DATA_ROLL_PTR_LEN == 7);
+	
+	mach_write_to_7(ptr, roll_ptr);
+}
+
+/*********************************************************************
+Reads a roll ptr from an index page. In case that the roll ptr size
+changes in some future version, this function should be used instead of
+mach_read_... */
+UNIV_INLINE
+dulint
+trx_read_roll_ptr(
+/*==============*/
+			/* out: roll ptr */
+	byte*	ptr)	/* in: pointer to memory from where to read */
+{
+	ut_ad(DATA_ROLL_PTR_LEN == 7);
+	
+	return(mach_read_from_7(ptr));
+}
+
+/**********************************************************************
+Gets an undo log page and x-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get(
+/*===============*/
+				/* out: pointer to page x-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	page_t*	page;
+	
+	page = buf_page_get(space, page_no, RW_X_LATCH, mtr);
+
+	buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
+
+	return(page);
+}
+
+/**********************************************************************
+Gets an undo log page and s-latches it. */
+UNIV_INLINE
+page_t*
+trx_undo_page_get_s_latched(
+/*=========================*/
+				/* out: pointer to page s-latched */
+	ulint	space,		/* in: space where placed */
+	ulint	page_no,	/* in: page number */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	page_t*	page;
+	
+	page = buf_page_get(space, page_no, RW_S_LATCH, mtr);
+
+	buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE);
+
+	return(page);
+}
+
+/**********************************************************************
+Returns the start offset of the undo log records of the specified undo
+log on the page. */
+UNIV_INLINE
+ulint
+trx_undo_page_get_start(
+/*====================*/
+			/* out: start offset */
+	page_t*	undo_page,/* in: undo log page */
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset)	/* in: undo log header offset on page */
+{
+	ulint	start;
+	
+	if (page_no == buf_frame_get_page_no(undo_page)) {
+
+		start = mach_read_from_2(offset + undo_page
+						+ TRX_UNDO_LOG_START);
+	} else {
+		start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
+	}
+
+	return(start);
+}
+
+/**********************************************************************
+Returns the end offset of the undo log records of the specified undo
+log on the page. */
+UNIV_INLINE
+ulint
+trx_undo_page_get_end(
+/*==================*/
+			/* out: end offset */
+	page_t*	undo_page,/* in: undo log page */
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset)	/* in: undo log header offset on page */
+{
+	trx_ulogf_t*	log_hdr;
+	ulint		end;
+	
+	if (page_no == buf_frame_get_page_no(undo_page)) {
+
+		log_hdr = undo_page + offset;
+
+		end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
+
+		if (end == 0) {
+			end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+						+ TRX_UNDO_PAGE_FREE);
+		}
+	} else {
+		end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+						+ TRX_UNDO_PAGE_FREE);
+	}
+
+	return(end);
+}
+
+/**********************************************************************
+Returns the previous undo record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_prev_rec(
+/*=======================*/
+				/* out: pointer to record, NULL if none */
+	trx_undo_rec_t*	rec,	/* in: undo log record */
+	ulint		page_no,/* in: undo log header page number */
+	ulint		offset)	/* in: undo log header offset on page */
+{
+	page_t*	undo_page;
+	ulint	start;
+
+	undo_page = buf_frame_align(rec);
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);
+
+	if (start + undo_page == rec) {
+
+		return(NULL);
+	}
+
+	return(undo_page + mach_read_from_2(rec - 2));
+}
+
+/**********************************************************************
+Returns the next undo log record on the page in the specified log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_next_rec(
+/*=======================*/
+				/* out: pointer to record, NULL if none */
+	trx_undo_rec_t*	rec,	/* in: undo log record */
+	ulint		page_no,/* in: undo log header page number */
+	ulint		offset)	/* in: undo log header offset on page */
+{
+	page_t*	undo_page;
+	ulint	end;
+	ulint	next;
+
+	undo_page = buf_frame_align(rec);
+
+	end = trx_undo_page_get_end(undo_page, page_no, offset);
+	
+	next = mach_read_from_2(rec);
+
+	if (next == end) {
+
+		return(NULL);
+	}
+	
+	return(undo_page + next);
+}
+
+/**********************************************************************
+Returns the last undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_last_rec(
+/*=======================*/
+			/* out: pointer to record, NULL if none */
+	page_t*	undo_page,/* in: undo log page */
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset)	/* in: undo log header offset on page */
+{
+	ulint	start;
+	ulint	end;
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);	
+	end = trx_undo_page_get_end(undo_page, page_no, offset);	
+	
+	if (start == end) {
+
+		return(NULL);
+	}
+
+	return(undo_page + mach_read_from_2(undo_page + end - 2));
+}
+
+/**********************************************************************
+Returns the first undo record on the page in the specified undo log, or
+NULL if none exists. */
+UNIV_INLINE
+trx_undo_rec_t*
+trx_undo_page_get_first_rec(
+/*========================*/
+			/* out: pointer to record, NULL if none */
+	page_t*	undo_page,/* in: undo log page */
+	ulint	page_no,/* in: undo log header page number */
+	ulint	offset)	/* in: undo log header offset on page */
+{
+	ulint	start;
+	ulint	end;
+
+	start = trx_undo_page_get_start(undo_page, page_no, offset);	
+	end = trx_undo_page_get_end(undo_page, page_no, offset);	
+	
+	if (start == end) {
+
+		return(NULL);
+	}
+
+	return(undo_page + start);
+}
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
new file mode 100644
index 00000000000..d60c297f3c4
--- /dev/null
+++ b/innobase/include/univ.i
@@ -0,0 +1,166 @@
+/***************************************************************************
+Version control for database, common definitions, and include files
+
+(c) 1994 - 2000 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#if (defined(_WIN32) || defined(_WIN64))
+#define __WIN__
+#include <windows.h>
+
+
+#else
+/* The Unix version */
+
+/* Include two header files from MySQL to make the Unix flavor used
+in compiling more Posix-compatible. We assume that 'innobase' is a
+subdirectory of 'mysql'. */
+#include <global.h>
+#include <my_pthread.h>
+
+#undef PACKAGE
+#undef VERSION
+
+/* Include the header file generated by GNU autoconf */
+#include "../ib_config.h"
+
+#ifdef HAVE_PREAD
+#define HAVE_PWRITE
+#endif
+
+#endif
+
+/*			DEBUG VERSION CONTROL
+			===================== */
+/* Make a non-inline debug version */
+/*
+#define UNIV_DEBUG
+#define UNIV_MEM_DEBUG
+#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
+
+#define UNIV_IBUF_DEBUG
+
+#define UNIV_SYNC_PERF_STAT
+#define UNIV_SEARCH_PERF_STAT
+*/
+#define UNIV_LIGHT_MEM_DEBUG
+
+#define YYDEBUG			1
+
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+			/* the above option prevents forcing of log to disk
+			at a buffer page write: it should be tested with this
+			option off; also some ibuf tests are suppressed */
+/*
+#define UNIV_BASIC_LOG_DEBUG
+*/
+			/* the above option enables basic recovery debugging:
+			new allocated file pages are reset */
+
+#if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC))
+/* Definition for inline version */
+
+#ifdef __WIN__
+#define UNIV_INLINE  	__inline
+#else
+/* config.h contains the right def for 'inline' for the current compiler */
+#define UNIV_INLINE  extern inline
+
+#endif
+
+#else
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE
+
+#endif	/* UNIV_DEBUG */
+
+#ifdef _WIN32
+#define UNIV_WORD_SIZE		4
+#elif defined(_WIN64)
+#define UNIV_WORD_SIZE		8
+#else
+/* config.h generated by GNU autoconf will define SIZEOF_INT in Posix */
+#define UNIV_WORD_SIZE		SIZEOF_INT
+#endif
+
+/* The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT      8
+
+/* The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT	UNIV_WORD_SIZE
+
+/*
+			DATABASE VERSION CONTROL
+			========================
+*/
+
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE          (2 * 8192) /* NOTE! Currently, this has to be a
+					power of 2 */
+/* The 2-logarithm of UNIV_PAGE_SIZE: */
+#define UNIV_PAGE_SIZE_SHIFT	14					
+
+/* Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM	32
+
+/*
+			UNIVERSAL TYPE DEFINITIONS
+			==========================
+*/
+
+/* Note that inside MySQL 'byte' is defined as char on Linux! */
+#define byte	unsigned char
+
+/* Another basic type we use is unsigned long integer which is intended to be
+equal to the word size of the machine. */
+
+typedef unsigned long int	ulint;
+
+typedef long int		lint;
+
+/* The following type should be at least a 64-bit floating point number */
+typedef double		utfloat;
+
+/* The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED		((ulint)(-1))
+
+/* The undefined 32-bit unsigned integer */
+#define	ULINT32_UNDEFINED	0xFFFFFFFF
+
+/* Maximum value for a ulint */
+#define ULINT_MAX		((ulint)(-2))
+
+/* This 'ibool' type is used within Innobase. Remember that different included
+headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
+#define ibool	ulint
+
+#ifndef TRUE
+
+#define TRUE    1
+#define FALSE   0
+
+#endif
+
+/* The following number as the length of a logical field means that the field
+has the SQL NULL as its value. */
+#define UNIV_SQL_NULL 	ULINT_UNDEFINED
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+
+#endif
diff --git a/innobase/include/univold.i b/innobase/include/univold.i
new file mode 100644
index 00000000000..8bcd28e180f
--- /dev/null
+++ b/innobase/include/univold.i
@@ -0,0 +1,164 @@
+/***************************************************************************
+Version control for database, common definitions, and include files
+
+(c) 1994 - 2000 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#define	UNIV_INTEL
+#define	UNIV_PENTIUM
+/* If UNIV_WINNT is not defined, we assume Windows 95 */
+
+#define	UNIV_WINNT
+#define UNIV_WINNT4
+#define __NT__
+
+#define UNIV_VISUALC
+
+#define __WIN__
+#define _WIN32_WINNT	0x0400
+
+/*			DEBUG VERSION CONTROL
+			===================== */
+/* Make a non-inline debug version */
+/*
+#define UNIV_DEBUG
+#define UNIV_MEM_DEBUG
+#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
+
+#define UNIV_IBUF_DEBUG
+
+#define UNIV_SEARCH_PERF_STAT
+#define UNIV_SYNC_PERF_STAT
+*/
+#define UNIV_LIGHT_MEM_DEBUG
+
+#define YYDEBUG			1
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+			/* the above option prevents forcing of log to disk
+			at a buffer page write: it should be tested with this
+			option off; also some ibuf tests are suppressed */
+/*
+#define UNIV_BASIC_LOG_DEBUG
+*/
+			/* the above option enables basic recovery debugging:
+			new allocated file pages are reset */
+
+/* The debug version is slower, thus we may change the length of test loops
+depending on the UNIV_DBC parameter */
+#ifdef UNIV_DEBUG
+#define UNIV_DBC	1
+#else
+#define	UNIV_DBC	100
+#endif
+
+#ifndef UNIV_DEBUG
+/* Definition for inline version */
+
+#ifdef UNIV_VISUALC
+#define UNIV_INLINE  	__inline
+#elif defined(UNIV_GNUC)
+#define UNIV_INLINE     extern __inline__
+#endif
+
+#else
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE
+
+#endif	/* UNIV_DEBUG */
+/* If the compiler does not know inline specifier, we use: */
+/*
+#define UNIV_INLINE     static
+*/
+
+
+/*
+			MACHINE VERSION CONTROL
+			=======================
+*/
+
+#ifdef UNIV_PENTIUM
+
+/* In a 32-bit computer word size is 4 */
+#define UNIV_WORD_SIZE		4
+
+/* The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT      8
+
+/* The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT	UNIV_WORD_SIZE
+
+#endif
+
+/*
+			DATABASE VERSION CONTROL
+			========================
+*/
+
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE          (2 * 8192)/* NOTE! Currently, this has to be a
+					power of 2 and divisible by
+					UNIV_MEM_ALIGNMENT */
+
+/* Do non-buffered io in buffer pool read/write operations */
+#define UNIV_NON_BUFFERED_IO
+
+/* Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM	32
+
+/*
+			UNIVERSAL TYPE DEFINITIONS
+			==========================
+*/
+
+
+typedef unsigned char   byte;
+
+/* An other basic type we use is unsigned long integer which is intended to be
+equal to the word size of the machine. */
+
+typedef unsigned long int	ulint;
+
+typedef long int		lint;
+
+/* The following type should be at least a 64-bit floating point number */
+typedef double		utfloat;
+
+/* The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED		((ulint)(-1))
+
+/* The undefined 32-bit unsigned integer */
+#define	ULINT32_UNDEFINED	0xFFFFFFFF
+
+/* Maximum value for a ulint */
+#define ULINT_MAX		((ulint)(-2))
+
+
+/* Definition of the boolean type */
+typedef ulint    bool;
+
+#define TRUE    1
+#define FALSE   0
+
+/* The following number as the length of a logical field means that the field
+has the SQL NULL as its value. */
+#define UNIV_SQL_NULL 	ULINT_UNDEFINED
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+
+#endif
diff --git a/innobase/include/univoldmysql.i b/innobase/include/univoldmysql.i
new file mode 100644
index 00000000000..269b584d073
--- /dev/null
+++ b/innobase/include/univoldmysql.i
@@ -0,0 +1,181 @@
+/***************************************************************************
+Version control for database, common definitions, and include files
+
+(c) 1994 - 1996 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+****************************************************************************/
+
+#ifndef univ_i
+#define univ_i
+
+#define	UNIV_INTEL
+#define	UNIV_PENTIUM
+/* If UNIV_WINNT is not defined, we assume Windows 95 */
+
+#define	UNIV_WINNT
+#define UNIV_WINNT4
+
+#define UNIV_VISUALC
+
+/*			DEBUG VERSION CONTROL
+			===================== */
+/* Make a profiler version where mutex_fence does not use CPUID and therefore
+is not totally safe. The sync-library must be recompiled before profiling. */
+/*
+#define UNIV_PROFILE
+*/
+/* When the following flag is defined, also mutex lock word reset to 0
+in mutex_exit is performed using a serializing instruction, which does not
+allow speculative reads be performed before memory writes */
+/*
+#define SYNC_SERIALIZE_MUTEX_RESET
+*/
+/* Make a non-inline debug version */
+
+#define UNIV_DEBUG
+#define UNIV_MEM_DEBUG
+#define UNIV_SYNC_DEBUG
+#define UNIV_SEARCH_DEBUG
+
+#define UNIV_IBUF_DEBUG
+
+#define UNIV_SEARCH_PERF_STAT
+#define UNIV_SYNC_PERF_STAT
+
+
+#define UNIV_LIGHT_MEM_DEBUG
+
+#define YYDEBUG			1
+/*
+#define UNIV_SQL_DEBUG
+#define UNIV_LOG_DEBUG
+*/
+			/* the above option prevents forcing of log to disk
+			at a buffer page write: it should be tested with this
+			option off; also some ibuf tests are suppressed */
+/*
+#define UNIV_BASIC_LOG_DEBUG
+*/
+			/* the above option enables basic recovery debugging:
+			new allocated file pages are reset */
+
+/* The debug version is slower, thus we may change the length of test loops
+depending on the UNIV_DBC parameter */
+#ifdef UNIV_DEBUG
+#define UNIV_DBC	1
+#else
+#define	UNIV_DBC	100
+#endif
+
+#ifndef UNIV_DEBUG
+/* Definition for inline version */
+
+#ifdef UNIV_VISUALC
+#define UNIV_INLINE  	__inline
+#elif defined(UNIV_GNUC)
+#define UNIV_INLINE     extern __inline__
+#endif
+
+#else
+/* If we want to compile a noninlined version we use the following macro
+definitions: */
+
+#define UNIV_NONINL
+#define UNIV_INLINE
+
+#endif	/* UNIV_DEBUG */
+/* If the compiler does not know inline specifier, we use: */
+/*
+#define UNIV_INLINE     static
+*/
+
+
+/*
+			MACHINE VERSION CONTROL
+			=======================
+*/
+
+#ifdef UNIV_PENTIUM
+
+/* In a 32-bit computer word size is 4 */
+#define UNIV_WORD_SIZE		4
+
+/* The following alignment is used in memory allocations in memory heap
+management to ensure correct alignment for doubles etc. */
+#define UNIV_MEM_ALIGNMENT      8
+
+/* The following alignment is used in aligning lints etc. */
+#define UNIV_WORD_ALIGNMENT	UNIV_WORD_SIZE
+
+#endif
+
+/*
+			DATABASE VERSION CONTROL
+			========================
+*/
+
+/* The universal page size of the database */
+#define UNIV_PAGE_SIZE          8192    /* NOTE! Currently, this has to be a
+					power of 2 and divisible by
+					UNIV_MEM_ALIGNMENT */
+/* 2-based logarithm of UNIV_PAGE_SIZE */
+#define	UNIV_PAGE_SIZE_SHIFT	13
+
+/* Do asynchronous io in buffer pool read/write operations */
+#ifdef UNIV_WINNT
+#define UNIV_ASYNC_IO
+#endif
+
+/* Do non-buffered io in buffer pool read/write operations */
+#define UNIV_NON_BUFFERED_IO
+
+/* Maximum number of parallel threads in a parallelized operation */
+#define UNIV_MAX_PARALLELISM	32
+
+/*
+			UNIVERSAL TYPE DEFINITIONS
+			==========================
+*/
+
+/*
+typedef unsigned char   byte;
+*/
+
+/* An other basic type we use is unsigned long integer which is intended to be
+equal to the word size of the machine. */
+
+typedef unsigned long int	ulint;
+
+typedef long int		lint;
+
+/* The following type should be at least a 64-bit floating point number */
+typedef double		utfloat;
+
+/* The 'undefined' value for a ulint */
+#define ULINT_UNDEFINED		((ulint)(-1))
+
+/* The undefined 32-bit unsigned integer */
+#define	ULINT32_UNDEFINED	0xFFFFFFFF
+
+/* Maximum value for a ulint */
+#define ULINT_MAX		((ulint)(-2))
+
+/* Definition of the boolean type */
+#ifndef bool
+typedef ulint    bool;
+#endif
+
+#define TRUE    1
+#define FALSE   0
+
+/* The following number as the length of a logical field means that the field
+has the SQL NULL as its value. */
+#define UNIV_SQL_NULL 	ULINT_UNDEFINED
+
+#include <stdio.h>
+#include "ut0dbg.h"
+#include "ut0ut.h"
+#include "db0err.h"
+
+#endif
diff --git a/innobase/include/usr0sess.h b/innobase/include/usr0sess.h
new file mode 100644
index 00000000000..365f828ecfc
--- /dev/null
+++ b/innobase/include/usr0sess.h
@@ -0,0 +1,318 @@
+/******************************************************
+Sessions
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0sess_h
+#define usr0sess_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "hash0hash.h"
+#include "trx0types.h"
+#include "srv0srv.h"
+#include "trx0types.h"
+#include "usr0types.h"
+#include "que0types.h"
+#include "data0data.h"
+#include "rem0rec.h"
+#include "com0com.h"
+
+/* The session system global data structure */
+extern sess_sys_t*	sess_sys;
+
+/*************************************************************************
+Sets the session id in a client message. */
+
+void
+sess_cli_msg_set_sess(
+/*==================*/
+	byte*	str,	/* in/out: message string */
+	dulint	sess_id);/* in: session id */
+/***************************************************************************
+Sets the message type of a message from the client. */
+UNIV_INLINE
+void
+sess_cli_msg_set_type(
+/*==================*/
+	byte*	str,	/* in: message string */
+	ulint	type);	/* in: message type */
+/***************************************************************************
+Gets the message type of a message from the server. */
+UNIV_INLINE
+ulint
+sess_srv_msg_get_type(
+/*==================*/
+			/* out: message type */
+	byte*	str);	/* in: message string */
+/***************************************************************************
+Creates a session sytem at database start. */
+
+void
+sess_sys_init_at_db_start(void);
+/*===========================*/
+/*************************************************************************
+Opens a session. */
+
+sess_t*
+sess_open(
+/*======*/
+					/* out, own: session object */
+	com_endpoint_t*	endpoint,	/* in: communication endpoint used
+					for communicating with the client */
+	byte*		addr_buf,	/* in: client address */
+	ulint		addr_len);	/* in: client address length */
+/*************************************************************************
+Closes a session, freeing the memory occupied by it. */
+
+void
+sess_close(
+/*=======*/
+	sess_t*		sess);	/* in, own: session object */
+/*************************************************************************
+Raises an SQL error. */
+
+void
+sess_raise_error_low(
+/*=================*/
+	trx_t*		trx,	/* in: transaction */
+	ulint		err_no,	/* in: error number */
+	ulint		type,	/* in: more info of the error, or 0 */
+	dict_table_t*	table,	/* in: dictionary table or NULL */
+	dict_index_t*	index,	/* in: table index or NULL */
+	dtuple_t*	tuple,	/* in: tuple to insert or NULL */
+	rec_t*		rec,	/* in: record or NULL */
+	char*		err_str);/* in: arbitrary null-terminated error string,
+				or NULL */
+/*************************************************************************
+Closes a session, freeing the memory occupied by it, if it is in a state
+where it should be closed. */
+
+ibool
+sess_try_close(
+/*===========*/
+				/* out: TRUE if closed */
+	sess_t*		sess);	/* in, own: session object */
+/*************************************************************************
+Initializes the first fields of a message to client. */
+
+void
+sess_srv_msg_init(
+/*==============*/
+	sess_t*	sess,	/* in: session object */
+	byte*	buf,	/* in: message buffer, must be at least of size
+			SESS_SRV_MSG_DATA */
+	ulint	type);	/* in: message type */
+/*************************************************************************
+Sends a simple message to client. */
+
+void
+sess_srv_msg_send_simple(
+/*=====================*/
+	sess_t*	sess,		/* in: session object */
+	ulint	type,		/* in: message type */
+	ulint	rel_kernel);	/* in: SESS_RELEASE_KERNEL or
+				SESS_NOT_RELEASE_KERNEL */
+/***************************************************************************
+Processes a message from a client. NOTE: May release the kernel mutex
+temporarily. */
+
+void
+sess_receive_msg_rel_kernel(
+/*========================*/
+	sess_t*	sess,	/* in: session */
+	byte*	str,	/* in: message string */
+	ulint	len);	/* in: message length */
+/***************************************************************************
+When a command has been completed, this function sends the message about it
+to the client. */
+
+void
+sess_command_completed_message(
+/*===========================*/
+	sess_t*	sess,	/* in: session */
+	byte*	msg,	/* in: message buffer */
+	ulint	len);	/* in: message data length */
+/***********************************************************************
+Starts a new connection and a session, or starts a query based on a client
+message. This is called by a SRV_COM thread. */
+
+void
+sess_process_cli_msg(
+/*=================*/
+	byte*	str,	/* in: message string */
+	ulint	len,	/* in: string length */
+	byte*	addr,	/* in: address string */
+	ulint	alen);	/* in: address length */
+
+
+/* The session handle. All fields are protected by the kernel mutex */
+struct sess_struct{
+	dulint		id;		/* session id */
+	dulint		usr_id;		/* user id */
+	hash_node_t	hash;		/* hash chain node */
+	ulint		refer_count;	/* reference count to the session
+					object: when this drops to zero
+					and the session has no query graphs
+					left, discarding the session object
+					is allowed */
+	dulint		error_count;	/* if this counter has increased while
+					a thread is parsing an SQL command,
+					its graph should be discarded */
+	ibool		disconnecting;	/* TRUE if the session is to be
+					disconnected when its reference
+					count drops to 0 */
+	ulint		state;		/* state of the session */
+	dulint		msgs_sent;	/* count of messages sent to the
+					client */
+	dulint		msgs_recv;	/* count of messages received from the
+					client */
+	ibool		client_waits;	/* when the session receives a message
+					from the client, this set to TRUE, and
+					when the session sends a message to
+					the client this is set to FALSE */
+	trx_t*		trx;		/* transaction object permanently
+					assigned for the session: the
+					transaction instance designated by the
+					trx id changes, but the memory
+					structure is preserved */
+	ulint		next_graph_id;	/* next query graph id to assign */
+	UT_LIST_BASE_NODE_T(que_t)
+			graphs;		/* query graphs belonging to this
+					session */
+	/*------------------------------*/
+	ulint		err_no;		/* latest error number, 0 if none */
+	char*		err_str;	/* latest error string */
+	ulint		err_len;	/* error string length */
+	/*------------------------------*/
+	com_endpoint_t*	endpoint;	/* server communications endpoint used
+					to communicate with the client */
+	char*		addr_buf;	/* client address string */
+	ulint		addr_len;	/* client address string length */
+	/*------------------------------*/
+	byte*		big_msg;	/* if the client sends a message which
+					does not fit in a single packet,
+					it is assembled in this buffer; if
+					this field is not NULL, it is assumed
+					that the message should be catenated
+					here */
+	ulint		big_msg_size;	/* size of the big message buffer */
+	ulint		big_msg_len;	/* length of data in the big message
+					buffer */
+};
+
+/* The session system; this is protected by the kernel mutex */
+struct sess_sys_struct{
+	ulint		state;		/* state of the system:
+					SESS_SYS_RUNNING or
+					SESS_SYS_SHUTTING_DOWN */
+	sess_t*		shutdown_req;	/* if shutdown was requested by some
+					session, confirmation of shutdown
+					completion should be sent to this
+					session */	
+	dulint		free_sess_id;	/* first unused session id */
+	hash_table_t*	hash;		/* hash table of the sessions */
+};
+
+
+/*---------------------------------------------------*/
+/* The format of an incoming message from a client */
+#define SESS_CLI_MSG_CHECKSUM	0	/* the checksum should be the first
+					field in the message */
+#define SESS_CLI_MSG_SESS_ID	4	/* this is set to 0 if the client
+					wants to connect and establish
+					a new session */
+#define	SESS_CLI_MSG_SESS_ID_CHECK 12	/* checksum of the sess id field */
+#define SESS_CLI_MSG_TYPE	16
+#define SESS_CLI_MSG_NO		20
+#define	SESS_CLI_MSG_CONTINUE	28	/* 0, or SESS_MSG_FIRST_PART
+					SESS_MSG_MIDDLE_PART, or
+					SESS_MSG_LAST_PART */
+#define	SESS_CLI_MSG_CONT_SIZE	32	/* size of a multipart message in
+					kilobytes (rounded upwards) */
+#define SESS_CLI_MSG_DATA	36
+/*---------------------------------------------------*/
+
+/* Client-to-session message types */
+#define SESS_CLI_CONNECT	1
+#define	SESS_CLI_PREPARE	2
+#define	SESS_CLI_EXECUTE	3
+#define	SESS_CLI_BREAK_EXECUTION 4
+
+/* Client-to-session statement command types */
+#define SESS_COMM_FETCH_NEXT	1
+#define SESS_COMM_FETCH_PREV	2
+#define SESS_COMM_FETCH_FIRST	3
+#define SESS_COMM_FETCH_LAST	4
+#define SESS_COMM_FETCH_NTH	5
+#define SESS_COMM_FETCH_NTH_LAST 6
+#define	SESS_COMM_EXECUTE	7
+#define	SESS_COMM_NO_COMMAND	8
+
+/*---------------------------------------------------*/
+/* The format of an outgoing message from a session to the client */
+#define SESS_SRV_MSG_CHECKSUM	0	/* the checksum should be the first
+					field in the message */
+#define SESS_SRV_MSG_SESS_ID	4
+#define SESS_SRV_MSG_TYPE	12
+#define SESS_SRV_MSG_NO		16
+#define	SESS_SRV_MSG_CONTINUE	24	/* 0, or SESS_MSG_FIRST_PART
+					SESS_MSG_MIDDLE_PART, or
+					SESS_MSG_LAST_PART */
+#define	SESS_SRV_MSG_CONT_SIZE	28	/* size of a multipart message
+					in kilobytes (rounded upward) */
+#define SESS_SRV_MSG_DATA	32
+/*---------------------------------------------------*/
+
+/* Session-to-client message types */
+#define	SESS_SRV_ACCEPT_CONNECT	1
+#define	SESS_SRV_SUCCESS	2
+#define	SESS_SRV_ERROR		3
+
+/* Multipart messages */
+#define SESS_MSG_SINGLE_PART	0
+#define SESS_MSG_FIRST_PART	1
+#define	SESS_MSG_MIDDLE_PART	2
+#define	SESS_MSG_LAST_PART	3
+
+/* Error numbers */
+#define SESS_ERR_NONE			0
+#define SESS_ERR_TRX_COMMITTED		1
+#define SESS_ERR_TRX_ROLLED_BACK	2
+#define	SESS_ERR_SESSION_DISCONNECTED	3
+#define	SESS_ERR_REPLY_FAILED		4
+#define	SESS_ERR_CANNOT_BREAK_OP	5
+#define	SESS_ERR_MSG_LOST		6
+#define	SESS_ERR_MSG_CORRUPTED		7
+#define	SESS_ERR_EXTRANEOUS_MSG		8
+#define	SESS_ERR_OUT_OF_MEMORY		9
+#define SESS_ERR_SQL_ERROR		10
+#define SESS_ERR_STMT_NOT_FOUND		11
+#define SESS_ERR_STMT_NOT_READY		12
+#define	SESS_ERR_EXTRANEOUS_SRV_MSG	13
+#define	SESS_ERR_BREAK_BY_CLIENT	14
+
+/* Session states */
+#define SESS_ACTIVE		1
+#define SESS_ERROR		2	/* session contains an error message
+					which has not yet been communicated
+					to the client */
+/* Session system states */
+#define SESS_SYS_RUNNING	1
+#define SESS_SYS_SHUTTING_DOWN	2
+
+/* Session hash table size */
+#define SESS_HASH_SIZE		1024
+
+/* Flags used in sess_srv_msg_send */
+#define SESS_RELEASE_KERNEL	1
+#define SESS_NOT_RELEASE_KERNEL	2
+
+#ifndef UNIV_NONINL
+#include "usr0sess.ic"
+#endif
+
+#endif 
diff --git a/innobase/include/usr0sess.ic b/innobase/include/usr0sess.ic
new file mode 100644
index 00000000000..ee2592c7963
--- /dev/null
+++ b/innobase/include/usr0sess.ic
@@ -0,0 +1,31 @@
+/******************************************************
+Sessions
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+/***************************************************************************
+Sets the message type of a message from the client. */
+UNIV_INLINE
+void
+sess_cli_msg_set_type(
+/*==================*/
+	byte*	str,	/* in: message string */
+	ulint	type)	/* in: message type */
+{
+	mach_write_to_4(str + SESS_CLI_MSG_TYPE, type);
+}
+
+/***************************************************************************
+Gets the message type of a message from the server. */
+UNIV_INLINE
+ulint
+sess_srv_msg_get_type(
+/*==================*/
+			/* out: message type */
+	byte*	str)	/* in: message string */
+{
+	return(mach_read_from_4(str + SESS_SRV_MSG_TYPE));
+}
diff --git a/innobase/include/usr0types.h b/innobase/include/usr0types.h
new file mode 100644
index 00000000000..67070ccce27
--- /dev/null
+++ b/innobase/include/usr0types.h
@@ -0,0 +1,16 @@
+/******************************************************
+Users and sessions global types
+
+(c) 1996 Innobase Oy
+
+Created 6/25/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef usr0types_h
+#define usr0types_h
+
+typedef struct sess_struct	sess_t;
+typedef struct sess_sys_struct	sess_sys_t;
+typedef struct sess_sig_struct	sess_sig_t;
+
+#endif 
diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h
new file mode 100644
index 00000000000..77795ee0708
--- /dev/null
+++ b/innobase/include/ut0byte.h
@@ -0,0 +1,229 @@
+/**********************************************************************
+Utilities for byte operations
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0byte_h
+#define ut0byte_h
+
+
+#include "univ.i"
+
+/* Type definition for a 64-bit unsigned integer, which works also
+in 32-bit machines. NOTE! Access the fields only with the accessor
+functions. This definition appears here only for the compiler to
+know the size of a dulint. */
+
+typedef	struct dulint_struct	dulint;
+struct dulint_struct{
+	ulint	high;	/* most significant 32 bits */
+	ulint	low;	/* least significant 32 bits */
+};
+
+/* Zero value for a dulint */
+extern dulint	ut_dulint_zero;
+
+/* Maximum value for a dulint */
+extern dulint	ut_dulint_max;
+
+/***********************************************************
+Creates a 64-bit dulint out of two ulints. */
+UNIV_INLINE
+dulint
+ut_dulint_create(
+/*=============*/
+			/* out: created dulint */
+	ulint	high,	/* in: high-order 32 bits */
+	ulint	low);	/* in: low-order 32 bits */
+/***********************************************************
+Gets the high-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_high(
+/*===============*/
+			/* out: 32 bits in ulint */
+	dulint	d);	/* in: dulint */
+/***********************************************************
+Gets the low-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_low(
+/*==============*/
+			/* out: 32 bits in ulint */
+	dulint	d);	/* in: dulint */
+/***********************************************************
+Tests if a dulint is zero. */
+UNIV_INLINE
+ibool
+ut_dulint_is_zero(
+/*==============*/
+			/* out: TRUE if zero */
+	dulint	a);	/* in: dulint */
+/***********************************************************
+Compares two dulints. */
+UNIV_INLINE
+int
+ut_dulint_cmp(
+/*==========*/
+			/* out: -1 if a < b, 0 if a == b,
+			1 if a > b */ 
+	dulint	a,	/* in: dulint */
+	dulint	b);	/* in: dulint */
+/***********************************************************
+Calculates the max of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_max(
+/*==============*/
+			/* out: max(a, b) */
+	dulint	a,	/* in: dulint */
+	dulint	b);	/* in: dulint */
+/***********************************************************
+Calculates the min of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_min(
+/*==============*/
+			/* out: min(a, b) */
+	dulint	a,	/* in: dulint */
+	dulint	b);	/* in: dulint */
+/***********************************************************
+Adds a ulint to a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_add(
+/*==========*/
+			/* out: sum a + b */
+	dulint	a,	/* in: dulint */
+	ulint	b);	/* in: ulint */
+/***********************************************************
+Subtracts a ulint from a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_subtract(
+/*===============*/
+			/* out: a - b */
+	dulint	a,	/* in: dulint */
+	ulint	b);	/* in: ulint, b <= a */
+/***********************************************************
+Subtracts a dulint from another. NOTE that the difference must be positive
+and smaller that 4G. */
+UNIV_INLINE
+ulint
+ut_dulint_minus(
+/*============*/
+			/* out: a - b */
+	dulint	a,	/* in: dulint; NOTE a must be >= b and at most
+			2 to power 32 - 1 greater */
+	dulint	b);	/* in: dulint */
+/************************************************************
+Rounds a dulint downward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_down(
+/*=================*/
+				/* out: rounded value */
+	dulint   n,        	/* in: number to be rounded */
+	ulint    align_no);  	/* in: align by this number which must be a
+				power of 2 */
+/************************************************************
+Rounds a dulint upward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_up(
+/*===============*/
+				/* out: rounded value */
+	dulint   n,        	/* in: number to be rounded */
+	ulint    align_no);  	/* in: align by this number which must be a
+				power of 2 */
+/***********************************************************
+Increments a dulint variable by 1. */
+#define UT_DULINT_INC(D)\
+{\
+	if ((D).low == 0xFFFFFFFF) {\
+		(D).high = (D).high + 1;\
+		(D).low = 0;\
+	} else {\
+		(D).low = (D).low + 1;\
+	}\
+}
+/***********************************************************
+Tests if two dulints are equal. */
+#define UT_DULINT_EQ(D1, D2)	(((D1).low == (D2).low)\
+						&& ((D1).high == (D2).high))
+/****************************************************************
+Sort function for dulint arrays. */
+void
+ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high);
+/*===============================================================*/
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the least product of align_no which is >= n. align_no has to be a
+power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align(
+/*==========*/
+				/* out: rounded value */
+	ulint    n,             /* in: number to be rounded */
+	ulint    align_no);     /* in: align by this number */
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the biggest product of align_no which is <= n. align_no has to be a
+power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align_down(
+/*===============*/
+				/* out: rounded value */
+	ulint    n,          	/* in: number to be rounded */
+	ulint    align_no);	/* in: align by this number */
+/*************************************************************
+The following function rounds up a pointer to the nearest aligned address. */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+				/* out: aligned pointer */
+	void*   ptr,            /* in: pointer */
+	ulint   align_no);     	/* in: align by this number */
+/*************************************************************
+The following function rounds down a pointer to the nearest
+aligned address. */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+				/* out: aligned pointer */
+	void*   ptr,            /* in: pointer */
+	ulint   align_no);      /* in: align by this number */
+/*********************************************************************
+Gets the nth bit of a ulint. */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+			/* out: TRUE if nth bit is 1; 0th bit is defined to
+			be the least significant */
+	ulint	a,	/* in: ulint */
+	ulint	n);	/* in: nth bit requested */
+/*********************************************************************
+Sets the nth bit of a ulint. */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+			/* out: the ulint with the bit set as requested */
+	ulint	a,	/* in: ulint */
+	ulint	n,	/* in: nth bit requested */
+	ibool	val);	/* in: value for the bit to set */
+
+
+#ifndef UNIV_NONINL
+#include "ut0byte.ic"
+#endif
+
+#endif
diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic
new file mode 100644
index 00000000000..b8170392c8f
--- /dev/null
+++ b/innobase/include/ut0byte.ic
@@ -0,0 +1,360 @@
+/******************************************************************
+Utilities for byte operations
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/***********************************************************
+Creates a 64-bit dulint out of two ulints. */
+UNIV_INLINE
+dulint
+ut_dulint_create(
+/*=============*/
+			/* out: created dulint */
+	ulint	high,	/* in: high-order 32 bits */
+	ulint	low)	/* in: low-order 32 bits */
+{
+	dulint	res;
+
+	ut_ad(high <= 0xFFFFFFFF);
+	ut_ad(low <= 0xFFFFFFFF);
+
+	res.high = high;
+	res.low  = low;
+
+	return(res);
+}
+
+/***********************************************************
+Gets the high-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_high(
+/*===============*/
+			/* out: 32 bits in ulint */
+	dulint	d)	/* in: dulint */
+{
+	return(d.high);
+}
+
+/***********************************************************
+Gets the low-order 32 bits of a dulint. */
+UNIV_INLINE
+ulint
+ut_dulint_get_low(
+/*==============*/
+			/* out: 32 bits in ulint */
+	dulint	d)	/* in: dulint */
+{
+	return(d.low);
+}
+
+/***********************************************************
+Tests if a dulint is zero. */
+UNIV_INLINE
+ibool
+ut_dulint_is_zero(
+/*==============*/
+			/* out: TRUE if zero */
+	dulint	a)	/* in: dulint */
+{
+	if ((a.low == 0) && (a.high == 0)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***********************************************************
+Compares two dulints. */
+UNIV_INLINE
+int
+ut_dulint_cmp(
+/*==========*/
+			/* out: -1 if a < b, 0 if a == b,
+			1 if a > b */ 
+	dulint	a,	/* in: dulint */
+	dulint	b)	/* in: dulint */
+{
+	if (a.high > b.high) {
+		return(1);
+	} else if (a.high < b.high) {
+		return(-1);
+	} else if (a.low > b.low) {
+		return(1);
+	} else if (a.low < b.low) {
+		return(-1);
+	} else {
+		return(0);
+	}
+}
+
+/***********************************************************
+Calculates the max of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_max(
+/*==============*/
+			/* out: max(a, b) */
+	dulint	a,	/* in: dulint */
+	dulint	b)	/* in: dulint */
+{
+	if (ut_dulint_cmp(a, b) > 0) {
+
+		return(a);
+	}
+
+	return(b);
+}
+
+/***********************************************************
+Calculates the min of two dulints. */
+UNIV_INLINE
+dulint
+ut_dulint_get_min(
+/*==============*/
+			/* out: min(a, b) */
+	dulint	a,	/* in: dulint */
+	dulint	b)	/* in: dulint */
+{
+	if (ut_dulint_cmp(a, b) > 0) {
+
+		return(b);
+	}
+
+	return(a);
+}
+
+/***********************************************************
+Adds a ulint to a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_add(
+/*==========*/
+			/* out: sum a + b */
+	dulint	a,	/* in: dulint */
+	ulint	b)	/* in: ulint */
+{
+	if (0xFFFFFFFF - b >= a.low) {
+		a.low += b;
+
+		return(a);
+	}
+
+	a.low = a.low - (0xFFFFFFFF - b) - 1;
+
+	a.high++;
+
+	return(a);
+}
+
+/***********************************************************
+Subtracts a ulint from a dulint. */
+UNIV_INLINE
+dulint
+ut_dulint_subtract(
+/*===============*/
+			/* out: a - b */
+	dulint	a,	/* in: dulint */
+	ulint	b)	/* in: ulint, b <= a */
+{
+	if (a.low >= b) {
+		a.low -= b;
+
+		return(a);
+	}
+
+	b -= a.low + 1;
+
+	a.low = 0xFFFFFFFF - b;
+
+	ut_ad(a.high > 0);
+	
+	a.high--;
+
+	return(a);
+}
+
+/***********************************************************
+Subtracts a dulint from another. NOTE that the difference must be positive
+and smaller that 4G. */
+UNIV_INLINE
+ulint
+ut_dulint_minus(
+/*============*/
+			/* out: a - b */
+	dulint	a,	/* in: dulint; NOTE a must be >= b and at most
+			2 to power 32 - 1 greater */
+	dulint	b)	/* in: dulint */
+{
+	ulint	diff;
+
+	if (a.high == b.high) {
+		ut_ad(a.low >= b.low);
+
+		return(a.low - b.low);
+	}
+
+	ut_ad(a.high == b.high + 1);
+
+	diff = (ulint)(0xFFFFFFFF - b.low);
+	diff += 1 + a.low;
+
+	ut_ad(diff > a.low);
+	
+	return(diff);
+} 
+
+/************************************************************
+Rounds a dulint downward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_down(
+/*=================*/
+				/* out: rounded value */
+	dulint   n,        	/* in: number to be rounded */
+	ulint    align_no)  	/* in: align by this number which must be a
+				power of 2 */
+{
+	ulint	low, high;
+
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+
+	low = ut_dulint_get_low(n);
+	high = ut_dulint_get_high(n);
+	
+	low = low & ~(align_no - 1);
+
+	return(ut_dulint_create(high, low));
+}
+
+/************************************************************
+Rounds a dulint upward to a multiple of a power of 2. */
+UNIV_INLINE
+dulint
+ut_dulint_align_up(
+/*===============*/
+				/* out: rounded value */
+	dulint   n,        	/* in: number to be rounded */
+	ulint    align_no)  	/* in: align by this number which must be a
+				power of 2 */
+{
+	return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
+}
+
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the least product of align_no which is >= n. align_no
+has to be a power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align(
+/*==========*/
+				/* out: rounded value */
+	ulint    n,        	/* in: number to be rounded */
+	ulint    align_no)  	/* in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+
+	return((n + align_no - 1) & ~(align_no - 1));
+}
+
+/*************************************************************
+The following function rounds up a pointer to the nearest aligned address. */
+UNIV_INLINE
+void*
+ut_align(
+/*=====*/
+				/* out: aligned pointer */
+	void*   ptr,            /* in: pointer */
+	ulint   align_no)      	/* in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1)));
+}
+
+/************************************************************
+The following function calculates the value of an integer n rounded
+to the biggest product of align_no which is <= n. align_no has to be a
+power of 2. */
+UNIV_INLINE
+ulint
+ut_calc_align_down(
+/*===============*/
+				/* out: rounded value */
+	ulint    n,              /* in: number to be rounded */
+	ulint    align_no)       /* in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+
+	return(n & ~(align_no - 1));
+}
+
+/*************************************************************
+The following function rounds down a pointer to the nearest
+aligned address. */
+UNIV_INLINE
+void*
+ut_align_down(
+/*==========*/
+				/* out: aligned pointer */
+	void*   ptr,            /* in: pointer */
+	ulint   align_no)      	/* in: align by this number */
+{
+	ut_ad(align_no > 0);
+	ut_ad(((align_no - 1) & align_no) == 0);
+	ut_ad(ptr);
+
+	ut_ad(sizeof(void*) == sizeof(ulint));
+
+	return((void*)((((ulint)ptr)) & ~(align_no - 1)));
+}
+
+/*********************************************************************
+Gets the nth bit of a ulint. */
+UNIV_INLINE
+ibool
+ut_bit_get_nth(
+/*===========*/
+			/* out: TRUE if nth bit is 1; 0th bit is defined to
+			be the least significant */
+	ulint	a,	/* in: ulint */
+	ulint	n)	/* in: nth bit requested */
+{
+	ut_ad(n < 8 * sizeof(ulint));
+	ut_ad(TRUE == 1);
+
+	return(1 & (a >> n));
+}
+
+/*********************************************************************
+Sets the nth bit of a ulint. */
+UNIV_INLINE
+ulint
+ut_bit_set_nth(
+/*===========*/
+			/* out: the ulint with the bit set as requested */
+	ulint	a,	/* in: ulint */
+	ulint	n,	/* in: nth bit requested */
+	ibool	val)	/* in: value for the bit to set */
+{
+	ut_ad(n < 8 * sizeof(ulint));
+	ut_ad(TRUE == 1);
+
+	if (val) {
+		return((1 << n) | a);
+	} else {
+		return(~(1 << n) & a);
+	}
+}
diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h
new file mode 100644
index 00000000000..cf49f4f993f
--- /dev/null
+++ b/innobase/include/ut0dbg.h
@@ -0,0 +1,78 @@
+/*********************************************************************
+Debug utilities for Innobase
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/30/1994 Heikki Tuuri
+**********************************************************************/
+
+#ifndef ut0dbg_h
+#define ut0dbg_h
+
+#include <assert.h>
+#include <stdlib.h>
+#include "univ.i"
+#include "os0thread.h"
+
+extern ulint	ut_dbg_zero; /* This is used to eliminate
+				compiler warnings */
+extern ibool	ut_dbg_stop_threads;
+
+extern ulint*	ut_dbg_null_ptr;
+
+				
+#define ut_a(EXPR)\
+{\
+	ulint	dbg_i;\
+\
+	if (!((ulint)(EXPR) + ut_dbg_zero)) {\
+	   	/* printf(\
+		"Assertion failure in thread %lu in file %s line %lu\n",\
+			os_thread_get_curr_id(), __FILE__, (ulint)__LINE__);\
+	   	printf(\
+	"we generate a memory trap on purpose to start the debugger\n");*/\
+		ut_dbg_stop_threads = TRUE;\
+		dbg_i = *(ut_dbg_null_ptr);\
+	   	if (dbg_i) {\
+			ut_dbg_null_ptr = NULL;\
+		}\
+	}\
+	if (ut_dbg_stop_threads) {\
+	   	printf("Thread %lu stopped in file %s line %lu\n",\
+			os_thread_get_curr_id(), __FILE__, (ulint)__LINE__);\
+		os_thread_sleep(1000000000);\
+	}\
+}
+
+#define ut_error {\
+	ulint	dbg_i;\
+	   printf(\
+		"Assertion failure in thread %lu in file %s line %lu\n",\
+			os_thread_get_curr_id(), __FILE__, (ulint)__LINE__);\
+	   printf("Generates memory trap on purpose for stack debugging\n");\
+	   ut_dbg_stop_threads = TRUE;\
+	   dbg_i = *(ut_dbg_null_ptr);\
+	   printf("%lu", dbg_i);\
+}
+
+
+
+#ifdef UNIV_DEBUG
+#define ut_ad(EXPR)  	ut_a(EXPR)
+#define ut_d(EXPR)	{EXPR;}
+#else
+#define ut_ad(EXPR)
+#define ut_d(EXPR)
+#endif
+
+
+#define UT_NOT_USED(A)	A = A
+
+
+
+
+
+
+
+#endif
+
diff --git a/innobase/include/ut0lst.h b/innobase/include/ut0lst.h
new file mode 100644
index 00000000000..d290c476963
--- /dev/null
+++ b/innobase/include/ut0lst.h
@@ -0,0 +1,215 @@
+/**********************************************************************
+List utilities
+
+(c) 1995 Innobase Oy
+
+Created 9/10/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0lst_h
+#define ut0lst_h
+
+#include "univ.i"
+
+/* This module implements the two-way linear list which should be used
+if a list is used in the database. Note that a single struct may belong
+to two or more lists, provided that the list are given different names.
+An example of the usage of the lists can be found in fil0fil.c. */
+
+/***********************************************************************
+This macro expands to the unnamed type definition of a struct which acts
+as the two-way list base node. The base node contains pointers
+to both ends of the list and a count of nodes in the list (excluding
+the base node from the count). TYPE should be the list node type name. */
+
+#define UT_LIST_BASE_NODE_T(TYPE)\
+struct {\
+	ulint	count;	/* count of nodes in list */\
+	TYPE *	start;	/* pointer to list start, NULL if empty */\
+	TYPE *	end;	/* pointer to list end, NULL if empty */\
+}\
+
+/***********************************************************************
+This macro expands to the unnamed type definition of a struct which
+should be embedded in the nodes of the list, the node type must be a struct.
+This struct contains the pointers to next and previous nodes in the list.
+The name of the field in the node struct should be the name given
+to the list. TYPE should be the list node type name. Example of usage:
+
+typedef struct LRU_node_struct	LRU_node_t;
+struct LRU_node_struct {
+	UT_LIST_NODE_T(LRU_node_t)	LRU_list;
+	...
+}
+The example implements an LRU list of name LRU_list. Its nodes are of type
+LRU_node_t.
+*/
+
+#define UT_LIST_NODE_T(TYPE)\
+struct {\
+	TYPE *	prev;	/* pointer to the previous node,\
+			NULL if start of list */\
+	TYPE *	next;	/* pointer to next node, NULL if end of list */\
+}\
+
+/***********************************************************************
+Initializes the base node of a two-way list. */
+
+#define UT_LIST_INIT(BASE)\
+{\
+	(BASE).count = 0;\
+	(BASE).start = NULL;\
+	(BASE).end   = NULL;\
+}\
+
+/***********************************************************************
+Adds the node as the first element in a two-way linked list.
+BASE has to be the base node (not a pointer to it). N has to be
+the pointer to the node to be added to the list. NAME is the list name. */
+
+#define UT_LIST_ADD_FIRST(NAME, BASE, N)\
+{\
+	ut_ad(N);\
+	((BASE).count)++;\
+	((N)->NAME).next = (BASE).start;\
+	((N)->NAME).prev = NULL;\
+	if ((BASE).start != NULL) {\
+		(((BASE).start)->NAME).prev = (N);\
+	}\
+	(BASE).start = (N);\
+	if ((BASE).end == NULL) {\
+		(BASE).end = (N);\
+	}\
+}\
+
+/***********************************************************************
+Adds the node as the last element in a two-way linked list.
+BASE has to be the base node (not a pointer to it). N has to be
+the pointer to the node to be added to the list. NAME is the list name. */
+
+#define UT_LIST_ADD_LAST(NAME, BASE, N)\
+{\
+	ut_ad(N);\
+	((BASE).count)++;\
+	((N)->NAME).prev = (BASE).end;\
+	((N)->NAME).next = NULL;\
+	if ((BASE).end != NULL) {\
+		(((BASE).end)->NAME).next = (N);\
+	}\
+	(BASE).end = (N);\
+	if ((BASE).start == NULL) {\
+		(BASE).start = (N);\
+	}\
+}\
+
+/***********************************************************************
+Inserts a NODE2 after NODE1 in a list.
+BASE has to be the base node (not a pointer to it). NAME is the list
+name, NODE1 and NODE2 are pointers to nodes. */
+
+#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\
+{\
+	ut_ad(NODE1);\
+	ut_ad(NODE2);\
+	((BASE).count)++;\
+	((NODE2)->NAME).prev = (NODE1);\
+	((NODE2)->NAME).next = ((NODE1)->NAME).next;\
+	if (((NODE1)->NAME).next != NULL) {\
+		((((NODE1)->NAME).next)->NAME).prev = (NODE2);\
+	}\
+	((NODE1)->NAME).next = (NODE2);\
+	if ((BASE).end == (NODE1)) {\
+		(BASE).end = (NODE2);\
+	}\
+}\
+
+/***********************************************************************
+Removes a node from a two-way linked list. BASE has to be the base node
+(not a pointer to it). N has to be the pointer to the node to be removed
+from the list. NAME is the list name. */
+
+#define UT_LIST_REMOVE(NAME, BASE, N)\
+{\
+	ut_ad(N);\
+	ut_a((BASE).count > 0);\
+	((BASE).count)--;\
+	if (((N)->NAME).next != NULL) {\
+		((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev;\
+	} else {\
+		(BASE).end = ((N)->NAME).prev;\
+	}\
+	if (((N)->NAME).prev != NULL) {\
+		((((N)->NAME).prev)->NAME).next = ((N)->NAME).next;\
+	} else {\
+		(BASE).start = ((N)->NAME).next;\
+	}\
+}\
+
+/************************************************************************
+Gets the next node in a two-way list. NAME is the name of the list
+and N is pointer to a node. */
+
+#define UT_LIST_GET_NEXT(NAME, N)\
+	(((N)->NAME).next)
+
+/************************************************************************
+Gets the previous node in a two-way list. NAME is the name of the list
+and N is pointer to a node. */
+
+#define UT_LIST_GET_PREV(NAME, N)\
+	(((N)->NAME).prev)
+
+/************************************************************************
+Alternative macro to get the number of nodes in a two-way list, i.e.,
+its length. BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_GET_LEN(BASE)\
+	(BASE).count
+
+/************************************************************************
+Gets the first node in a two-way list, or returns NULL,
+if the list is empty. BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_GET_FIRST(BASE)\
+	(BASE).start
+
+/************************************************************************
+Gets the last node in a two-way list, or returns NULL,
+if the list is empty. BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_GET_LAST(BASE)\
+	(BASE).end
+
+/************************************************************************
+Checks the consistency of a two-way list. NAME is the name of the list,
+TYPE is the node type, and BASE is the base node (not a pointer to it). */
+
+#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\
+{\
+	ulint	ut_list_i_313;\
+	TYPE *	ut_list_node_313;\
+\
+	ut_list_node_313 = (BASE).start;\
+\
+	for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
+	 					ut_list_i_313++) {\
+	 	ut_a(ut_list_node_313);\
+	 	ut_list_node_313 = (ut_list_node_313->NAME).next;\
+	}\
+\
+	ut_a(ut_list_node_313 == NULL);\
+\
+	ut_list_node_313 = (BASE).end;\
+\
+	for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\
+	 					ut_list_i_313++) {\
+	 	ut_a(ut_list_node_313);\
+	 	ut_list_node_313 = (ut_list_node_313->NAME).prev;\
+	}\
+\
+	ut_a(ut_list_node_313 == NULL);\
+}\
+	
+
+#endif
+
diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h
new file mode 100644
index 00000000000..4d266f34c17
--- /dev/null
+++ b/innobase/include/ut0mem.h
@@ -0,0 +1,64 @@
+/***********************************************************************
+Memory primitives
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+#ifndef ut0mem_h
+#define ut0mem_h
+
+#include <string.h>
+#include <stdlib.h>
+#include "univ.i"
+
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, void* sour, ulint n);
+
+UNIV_INLINE
+void*
+ut_memmove(void* dest, void* sour, ulint n);
+
+UNIV_INLINE
+int
+ut_memcmp(void* str1, void* str2, ulint n);
+
+
+void*
+ut_malloc(ulint n);
+
+UNIV_INLINE
+void
+ut_free(void* ptr);
+
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, char* sour);
+
+UNIV_INLINE
+ulint
+ut_strlen(char* str);
+
+UNIV_INLINE
+int
+ut_strcmp(void* str1, void* str2);
+
+/**************************************************************************
+Catenates two strings into newly allocated memory. The memory must be freed
+using mem_free. */
+
+char*
+ut_str_catenate(
+/*============*/
+			/* out, own: catenated null-terminated string */
+	char*	str1,	/* in: null-terminated string */
+	char*	str2);	/* in: null-terminated string */
+
+#ifndef UNIV_NONINL
+#include "ut0mem.ic"
+#endif
+
+#endif
+
diff --git a/innobase/include/ut0mem.ic b/innobase/include/ut0mem.ic
new file mode 100644
index 00000000000..fc4b6bd8be5
--- /dev/null
+++ b/innobase/include/ut0mem.ic
@@ -0,0 +1,57 @@
+/***********************************************************************
+Memory primitives
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+************************************************************************/
+
+UNIV_INLINE
+void*
+ut_memcpy(void* dest, void* sour, ulint n)
+{
+	return(memcpy(dest, sour, n)); 
+}
+
+UNIV_INLINE
+void*
+ut_memmove(void* dest, void* sour, ulint n)
+{
+	return(memmove(dest, sour, n));
+}
+
+UNIV_INLINE
+int
+ut_memcmp(void* str1, void* str2, ulint n)
+{
+	return(memcmp(str1, str2, n));
+}
+
+UNIV_INLINE
+void
+ut_free(void* ptr)
+{
+	free(ptr);
+}
+
+UNIV_INLINE
+char*
+ut_strcpy(char* dest, char* sour)
+{
+	return(strcpy(dest, sour));
+}
+
+UNIV_INLINE
+ulint
+ut_strlen(char* str)
+{
+	return(strlen(str));
+}
+
+UNIV_INLINE
+int
+ut_strcmp(void* str1, void* str2)
+{
+	return(strcmp((char*)str1, (char*)str2));
+}
+
diff --git a/innobase/include/ut0rnd.h b/innobase/include/ut0rnd.h
new file mode 100644
index 00000000000..a30251e6da0
--- /dev/null
+++ b/innobase/include/ut0rnd.h
@@ -0,0 +1,121 @@
+/**********************************************************************
+Random numbers and hashing
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0rnd_h
+#define ut0rnd_h
+
+#include "univ.i"
+
+#include "ut0byte.h"
+
+/* The 'character code' for end of field or string (used
+in folding records */
+#define UT_END_OF_FIELD         257
+
+/************************************************************
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+	ulint    seed);          /* in: seed */
+/************************************************************
+The following function generates a series of 'random' ulint integers. */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+			/* out: the next 'random' number */
+	ulint	rnd);	/* in: the previous random number value */
+/*************************************************************
+The following function generates 'random' ulint integers which
+enumerate the value space (let there be N of them) of ulint integers
+in a pseudo random fashion. Note that the same integer is repeated
+always after N calls to the generator. */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void);
+/*==================*/
+			/* out: the 'random' number */
+/************************************************************
+Generates a random integer from a given interval. */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+			/* out: the 'random' number */
+	ulint	low,	/* in: low limit; can generate also this value */
+	ulint	high);	/* in: high limit; can generate also this value */
+/*************************************************************
+Generates a random iboolean value. */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void);
+/*=================*/
+			/* out: the random value */
+/***********************************************************
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime or some
+random number to work reliably. */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*=========*/
+				/* out: hash value */
+	ulint    key,           /* in: value to be hashed */
+	ulint    table_size);   /* in: hash table size */
+/*****************************************************************
+Folds a pair of ulints. */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+			/* out: folded value */
+	ulint	n1,	/* in: ulint */
+	ulint	n2);	/* in: ulint */
+/*****************************************************************
+Folds a dulint. */
+UNIV_INLINE
+ulint
+ut_fold_dulint(
+/*===========*/
+			/* out: folded value */
+	dulint	d);	/* in: dulint */
+/*****************************************************************
+Folds a character string ending in the null character. */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+			/* out: folded value */
+	char*	str);	/* in: null-terminated string */	
+/*****************************************************************
+Folds a binary string. */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+			/* out: folded value */
+	byte*	str,	/* in: string of bytes */
+	ulint	len);	/* in: length */
+/***************************************************************
+Looks for a prime number slightly greater than the given argument.
+The prime is chosen so that it is not near any power of 2. */
+
+ulint
+ut_find_prime(
+/*==========*/
+			/* out: prime */
+	ulint    n);     /* in: positive number > 100 */
+
+
+#ifndef UNIV_NONINL
+#include "ut0rnd.ic"
+#endif
+
+#endif
diff --git a/innobase/include/ut0rnd.ic b/innobase/include/ut0rnd.ic
new file mode 100644
index 00000000000..e166a26fe86
--- /dev/null
+++ b/innobase/include/ut0rnd.ic
@@ -0,0 +1,222 @@
+/******************************************************************
+Random numbers and hashing
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+#define UT_HASH_RANDOM_MASK     1463735687
+#define UT_HASH_RANDOM_MASK2    1653893711
+#define UT_RND1                 151117737
+#define UT_RND2                 119785373
+#define UT_RND3                  85689495
+#define UT_RND4                  76595339
+#define UT_SUM_RND2              98781234 
+#define UT_SUM_RND3             126792457
+#define UT_SUM_RND4              63498502
+#define UT_XOR_RND1             187678878
+#define UT_XOR_RND2             143537923
+
+extern  ulint    ut_rnd_ulint_counter;
+
+/************************************************************
+This is used to set the random number seed. */
+UNIV_INLINE
+void
+ut_rnd_set_seed(
+/*============*/
+	ulint    seed)           /* in: seed */
+{
+	ut_rnd_ulint_counter = seed;
+}
+
+/************************************************************
+The following function generates a series of 'random' ulint integers. */
+UNIV_INLINE
+ulint
+ut_rnd_gen_next_ulint(
+/*==================*/
+			/* out: the next 'random' number */
+	ulint	rnd)	/* in: the previous random number value */
+{
+	ulint	n_bits;
+
+	n_bits = 8 * sizeof(ulint);
+
+	rnd = UT_RND2 * rnd + UT_SUM_RND3;
+	rnd = UT_XOR_RND1 ^ rnd;
+	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
+	rnd = UT_RND3 * rnd + UT_SUM_RND4;
+	rnd = UT_XOR_RND2 ^ rnd;
+	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
+	rnd = UT_RND1 * rnd + UT_SUM_RND2;
+
+	return(rnd);
+}
+
+/************************************************************
+The following function generates 'random' ulint integers which
+enumerate the value space of ulint integers in a pseudo random
+fashion. Note that the same integer is repeated always after
+2 to power 32 calls to the generator (if ulint is 32-bit). */
+UNIV_INLINE
+ulint
+ut_rnd_gen_ulint(void)
+/*==================*/
+			/* out: the 'random' number */
+{
+	ulint   rnd;
+	ulint	n_bits;
+
+	n_bits = 8 * sizeof(ulint);
+
+	ut_rnd_ulint_counter =
+			UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
+
+	rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
+
+	return(rnd);
+}
+
+/************************************************************
+Generates a random integer from a given interval. */
+UNIV_INLINE
+ulint
+ut_rnd_interval(
+/*============*/
+			/* out: the 'random' number */
+	ulint	low,	/* in: low limit; can generate also this value */
+	ulint	high)	/* in: high limit; can generate also this value */
+{
+	ulint	rnd;
+
+	ut_ad(high >= low);
+
+	if (low == high) {
+
+		return(low);
+	}
+
+	rnd = ut_rnd_gen_ulint();
+
+	return(low + (rnd % (high - low + 1)));
+}
+
+/*************************************************************
+Generates a random iboolean value. */
+UNIV_INLINE
+ibool
+ut_rnd_gen_ibool(void)
+/*=================*/
+			/* out: the random value */
+{
+	ulint    x;
+
+	x = ut_rnd_gen_ulint();
+
+	if (((x >> 20) + (x >> 15)) & 1) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+} 
+
+/***********************************************************
+The following function generates a hash value for a ulint integer
+to a hash table of size table_size, which should be a prime
+or some random number for the hash table to work reliably. */
+UNIV_INLINE
+ulint
+ut_hash_ulint(
+/*=========*/
+				/* out: hash value */
+	ulint    key,      	/* in: value to be hashed */
+	ulint    table_size) 	/* in: hash table size */
+{
+	key = key ^ UT_HASH_RANDOM_MASK2;
+
+	return(key % table_size);
+}
+
+/*****************************************************************
+Folds a pair of ulints. */
+UNIV_INLINE
+ulint
+ut_fold_ulint_pair(
+/*===============*/
+			/* out: folded value */
+	ulint	n1,	/* in: ulint */
+	ulint	n2)	/* in: ulint */
+{
+	return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
+						^ UT_HASH_RANDOM_MASK) + n2);
+} 
+
+/*****************************************************************
+Folds a dulint. */
+UNIV_INLINE
+ulint
+ut_fold_dulint(
+/*===========*/
+			/* out: folded value */
+	dulint	d)	/* in: dulint */
+{
+	return(ut_fold_ulint_pair(ut_dulint_get_low(d),
+						ut_dulint_get_high(d)));
+} 
+
+/*****************************************************************
+Folds a character string ending in the null character. */
+UNIV_INLINE
+ulint
+ut_fold_string(
+/*===========*/
+			/* out: folded value */
+	char*	str)	/* in: null-terminated string */	
+{
+	#ifdef UNIV_DEBUG
+	ulint	i = 0;
+	#endif
+	ulint	fold = 0;
+
+	ut_ad(str);
+
+	while (*str != '\0') {
+
+		#ifdef UNIV_DEBUG
+		i++;
+		ut_a(i < 100);
+		#endif
+
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str));
+		str++;
+	}
+
+	return(fold);
+}
+	
+/*****************************************************************
+Folds a binary string. */
+UNIV_INLINE
+ulint
+ut_fold_binary(
+/*===========*/
+			/* out: folded value */
+	byte*	str,	/* in: string of bytes */
+	ulint	len)	/* in: length */
+{
+	ulint	i;
+	ulint	fold = 0;
+
+	ut_ad(str);
+
+	for (i = 0; i < len; i++) {
+		fold = ut_fold_ulint_pair(fold, (ulint)(*str));
+
+		str++;
+	}
+
+	return(fold);
+}
diff --git a/innobase/include/ut0sort.h b/innobase/include/ut0sort.h
new file mode 100644
index 00000000000..d0a3d34e79e
--- /dev/null
+++ b/innobase/include/ut0sort.h
@@ -0,0 +1,91 @@
+/**********************************************************************
+Sort utility
+
+(c) 1995 Innobase Oy
+
+Created 11/9/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0sort_h
+#define ut0sort_h
+
+#include "univ.i"
+
+/* This module gives a macro definition of the body of
+a standard sort function for an array of elements of any
+type. The comparison function is given as a parameter to
+the macro. The sort algorithm is mergesort which has logarithmic
+worst case.
+*/
+
+/***********************************************************************
+This macro expands to the body of a standard sort function.
+The sort function uses mergesort and must be defined separately
+for each type of array.
+Also the comparison function has to be defined individually
+for each array cell type. SORT_FUN is the sort function name.
+The function takes the array to be sorted (ARR),
+the array of auxiliary space (AUX_ARR) of same size,
+and the low (LOW), inclusive, and high (HIGH), noninclusive,
+limits for the sort interval as arguments.
+CMP_FUN is the comparison function name. It takes as arguments
+two elements from the array and returns 1, if the first is bigger,
+0 if equal, and -1 if the second bigger. For an eaxmaple of use
+see test program in tsut.c. */
+
+#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
+{\
+	ulint		ut_sort_mid77;\
+	ulint		ut_sort_i77;\
+	ulint		ut_sort_low77;\
+	ulint		ut_sort_high77;\
+\
+  	ut_ad((LOW) < (HIGH));\
+  	ut_ad(ARR);\
+  	ut_ad(AUX_ARR);\
+\
+  	if ((LOW) == (HIGH) - 1) {\
+  		return;\
+  	} else if ((LOW) == (HIGH) - 2) {\
+  		if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\
+  			(AUX_ARR)[LOW] = (ARR)[LOW];\
+  			(ARR)[LOW] = (ARR)[(HIGH) - 1];\
+  			(ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\
+  		}\
+  		return;\
+  	}\
+\
+	ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\
+\
+	SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\
+	SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\
+\
+	ut_sort_low77 = (LOW);\
+	ut_sort_high77 = ut_sort_mid77;\
+\
+  	for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
+\
+  		if (ut_sort_low77 >= ut_sort_mid77) {\
+  			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
+  			ut_sort_high77++;\
+  		} else if (ut_sort_high77 >= (HIGH)) {\
+  			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
+  			ut_sort_low77++;\
+  		} else if (CMP_FUN((ARR)[ut_sort_low77],\
+				   (ARR)[ut_sort_high77]) > 0) {\
+  			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
+  			ut_sort_high77++;\
+		} else {\
+  			(AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
+  			ut_sort_low77++;\
+		}\
+	}\
+\
+  	for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
+  		(ARR)[ut_sort_i77] = (AUX_ARR)[ut_sort_i77];\
+  	}\
+}\
+
+	
+#endif
+
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
new file mode 100644
index 00000000000..05d4f455c58
--- /dev/null
+++ b/innobase/include/ut0ut.h
@@ -0,0 +1,174 @@
+/**********************************************************************
+Various utilities
+
+(c) 1994, 1995 Innobase Oy
+
+Created 1/20/1994 Heikki Tuuri
+***********************************************************************/
+
+#ifndef ut0ut_h
+#define ut0ut_h
+
+#include <time.h>
+#include <ctype.h>
+
+#include "univ.i"
+
+
+typedef time_t	ib_time_t;
+
+/**********************************************************
+Calculates the minimum of two ulints. */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+			/* out: minimum */
+	ulint    n1,  	/* in: first number */
+	ulint    n2);  	/* in: second number */
+/**********************************************************
+Calculates the maximum of two ulints. */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+			/* out: maximum */
+	ulint    n1,	/* in: first number */
+	ulint    n2);	/* in: second number */
+/********************************************************************
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+	ulint*	a,	/* out: more significant part of minimum */
+	ulint*	b,	/* out: less significant part of minimum */
+	ulint	a1,	/* in: more significant part of first pair */
+	ulint	b1,	/* in: less significant part of first pair */
+	ulint	a2,	/* in: more significant part of second pair */
+	ulint	b2);	/* in: less significant part of second pair */
+/**********************************************************
+Compares two ulints. */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+			/* out: 1 if a > b, 0 if a == b, -1 if a < b */
+	ulint	a,	/* in: ulint */
+	ulint	b);	/* in: ulint */
+/***********************************************************
+Compares two pairs of ulints. */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+			/* out: -1 if a < b, 0 if a == b,
+			1 if a > b */ 
+	ulint	a1,	/* in: more significant part of first pair */
+	ulint	a2,	/* in: less significant part of first pair */
+	ulint	b1,	/* in: more significant part of second pair */
+	ulint	b2);	/* in: less significant part of second pair */
+/*****************************************************************
+Calculates fast the remainder when divided by a power of two. */
+UNIV_INLINE
+ulint
+ut_2pow_remainder(
+/*==============*/	/* out: remainder */
+	ulint	n,	/* in: number to be divided */
+	ulint	m);	/* in: divisor; power of 2 */
+/*****************************************************************
+Calculates fast value rounded to a multiple of a power of 2. */
+UNIV_INLINE
+ulint
+ut_2pow_round(
+/*==========*/		/* out: value of n rounded down to nearest
+			multiple of m */
+	ulint	n,	/* in: number to be rounded */
+	ulint	m);	/* in: divisor; power of 2 */
+/*****************************************************************
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer. */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+			/* out: logarithm in the base 2, rounded upward */
+	ulint	n);	/* in: number */
+/*****************************************************************
+Calculates 2 to power n. */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+			/* out: 2 to power n */
+	ulint	n);	/* in: number */
+/*****************************************************************
+Calculates fast the number rounded up to the nearest power of 2. */
+UNIV_INLINE
+ulint
+ut_2_power_up(
+/*==========*/
+			/* out: first power of 2 which is >= n */
+	ulint	n);	/* in: number != 0 */
+/****************************************************************
+Sort function for ulint arrays. */
+
+void
+ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high);
+/*============================================================*/
+/************************************************************
+The following function returns a clock time in milliseconds. */
+
+ulint
+ut_clock(void);
+/**************************************************************
+Returns system time. We do not specify the format of the time returned:
+the only way to manipulate it is to use the function ut_difftime. */
+
+ib_time_t
+ut_time(void);
+/*=========*/
+/**************************************************************
+Returns the difference of two times in seconds. */
+
+double
+ut_difftime(
+/*========*/
+				/* out: time2 - time1 expressed in seconds */
+	ib_time_t	time2,	/* in: time */
+	ib_time_t	time1);	/* in: time */
+/*****************************************************************
+Runs an idle loop on CPU. The argument gives the desired delay
+in microseconds on 100 MHz Pentium + Visual C++. */
+
+ulint
+ut_delay(
+/*=====*/
+			/* out: dummy value */
+	ulint	delay);	/* in: delay in microseconds on 100 MHz Pentium */
+/*****************************************************************
+Prints the contents of a memory buffer in hex and ascii. */
+
+void
+ut_print_buf(
+/*=========*/
+	byte*	buf,    /* in: memory buffer */
+	ulint	len);   /* in: length of the buffer */
+/*****************************************************************
+Prints the contents of a memory buffer in hex and ascii. */
+
+ulint
+ut_sprintf_buf(
+/*===========*/
+			/* out: printed length in bytes */
+	char*	str,	/* in: buffer to print to */
+	byte*	buf,	/* in: memory buffer */
+	ulint 	len);	/* in: length of the buffer */
+
+
+#ifndef UNIV_NONINL
+#include "ut0ut.ic"
+#endif
+
+#endif
+
diff --git a/innobase/include/ut0ut.ic b/innobase/include/ut0ut.ic
new file mode 100644
index 00000000000..90f25d2b382
--- /dev/null
+++ b/innobase/include/ut0ut.ic
@@ -0,0 +1,196 @@
+/******************************************************************
+Various utilities
+
+(c) 1994, 1995 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*******************************************************************/
+
+/**********************************************************
+Calculates the minimum of two ulints. */
+UNIV_INLINE
+ulint
+ut_min(
+/*===*/
+			/* out: minimum */
+	ulint    n1,	/* in: first number */
+	ulint    n2)	/* in: second number */
+{
+	return((n1 <= n2) ? n1 : n2);
+}
+
+/**********************************************************
+Calculates the maximum of two ulints. */
+UNIV_INLINE
+ulint
+ut_max(
+/*===*/
+			/* out: maximum */
+	ulint    n1,	/* in: first number */
+	ulint    n2)	/* in: second number */
+{
+	return((n1 <= n2) ? n2 : n1);
+}
+
+/********************************************************************
+Calculates minimum of two ulint-pairs. */
+UNIV_INLINE
+void
+ut_pair_min(
+/*========*/
+	ulint*	a,	/* out: more significant part of minimum */
+	ulint*	b,	/* out: less significant part of minimum */
+	ulint	a1,	/* in: more significant part of first pair */
+	ulint	b1,	/* in: less significant part of first pair */
+	ulint	a2,	/* in: more significant part of second pair */
+	ulint	b2)	/* in: less significant part of second pair */
+{	
+	if (a1 == a2) {
+		*a = a1;
+		*b = ut_min(b1, b2);
+	} else if (a1 < a2) {
+		*a = a1;
+		*b = b1;
+	} else {
+		*a = a2;
+		*b = b2;
+	}
+}
+
+/**********************************************************
+Compares two ulints. */
+UNIV_INLINE
+int
+ut_ulint_cmp(
+/*=========*/
+			/* out: 1 if a > b, 0 if a == b, -1 if a < b */
+	ulint	a,	/* in: ulint */
+	ulint	b)	/* in: ulint */
+{
+	if (a < b) {
+		return(-1);
+	} else if (a == b) {
+		return(0);
+	} else {
+		return(1);
+	}
+}
+
+/***********************************************************
+Compares two pairs of ulints. */
+UNIV_INLINE
+int
+ut_pair_cmp(
+/*========*/
+			/* out: -1 if a < b, 0 if a == b, 1 if a > b */ 
+	ulint	a1,	/* in: more significant part of first pair */
+	ulint	a2,	/* in: less significant part of first pair */
+	ulint	b1,	/* in: more significant part of second pair */
+	ulint	b2)	/* in: less significant part of second pair */
+{
+	if (a1 > b1) {
+		return(1);
+	} else if (a1 < b1) {
+		return(-1);
+	} else if (a2 > b2) {
+		return(1);
+	} else if (a2 < b2) {
+		return(-1);
+	} else {
+		return(0);
+	}
+}
+
+/*****************************************************************
+Calculates fast the remainder when divided by a power of two. */
+UNIV_INLINE
+ulint
+ut_2pow_remainder(
+/*==============*/	/* out: remainder */
+	ulint	n,	/* in: number to be divided */
+	ulint	m)	/* in: divisor; power of 2 */
+{
+	ut_ad(0x80000000 % m == 0);
+
+	return(n & (m - 1));
+}
+
+/*****************************************************************
+Calculates fast a value rounded to a multiple of a power of 2. */
+UNIV_INLINE
+ulint
+ut_2pow_round(
+/*==========*/		/* out: value of n rounded down to nearest
+			multiple of m */
+	ulint	n,	/* in: number to be rounded */
+	ulint	m)	/* in: divisor; power of 2 */
+{
+	ut_ad(0x80000000 % m == 0);
+
+	return(n & ~(m - 1));
+}
+
+/*****************************************************************
+Calculates fast the 2-logarithm of a number, rounded upward to an
+integer. */
+UNIV_INLINE
+ulint
+ut_2_log(
+/*=====*/
+			/* out: logarithm in the base 2, rounded upward */
+	ulint	n)	/* in: number != 0 */
+{
+	ulint	res;
+
+	res = 0;
+
+	ut_ad(n > 0);
+
+	n = n - 1;
+
+	for (;;) {
+		n = n / 2;
+
+		if (n == 0) {
+			break;
+		}
+
+		res++;
+	}
+
+	return(res + 1);
+}
+
+/*****************************************************************
+Calculates 2 to power n. */
+UNIV_INLINE
+ulint
+ut_2_exp(
+/*=====*/
+			/* out: 2 to power n */
+	ulint	n)	/* in: number */
+{
+	return(1 << n);
+}
+
+/*****************************************************************
+Calculates fast the number rounded up to the nearest power of 2. */
+UNIV_INLINE
+ulint
+ut_2_power_up(
+/*==========*/
+			/* out: first power of 2 which is >= n */
+	ulint	n)	/* in: number != 0 */
+{
+	ulint	res;
+
+	res = 1;
+
+	ut_ad(n > 0);
+
+	while (res < n) {
+		res = res * 2;
+	}
+
+	return(res);
+}
author	unknown <monty@donna.mysql.com>	2001-02-17 14:19:19 +0200
committer	unknown <monty@donna.mysql.com>	2001-02-17 14:19:19 +0200
commit	2662b59306ef0cd495fa6e2edf7129e58a11393a (patch)
tree	bfe39951a73e906579ab819bf5198ad8f3a64a36 /innobase/include
parent	66de55a56bdcf2f7a9c0c4f8e19b3e761475e202 (diff)
download	mariadb-git-2662b59306ef0cd495fa6e2edf7129e58a11393a.tar.gz