Merge with 5.2.

no_error handling for select (used by INSERT ... SELECT) still needs to be fixed, but I will do that in a separate commit
author: Michael Widenius <monty@askmonty.org> 2011-12-11 11:34:44 +0200
committer: Michael Widenius <monty@askmonty.org> 2011-12-11 11:34:44 +0200
commit: 6d4224a31c9d32c8f8067a4f7d16daa29bcdee6b (patch)
tree: 79e3143528495069ad232f673532573b30afe425 /storage/xtradb
parent: 3e2cb35e11cb5ee6668d538a62a3b32e017944a5 (diff)
parent: 701c0f822abe4ee9eeafd244fa30dc2fcf067b81 (diff)
download: mariadb-git-6d4224a31c9d32c8f8067a4f7d16daa29bcdee6b.tar.gz
88 files changed, 4380 insertions, 2992 deletions
diff --git a/storage/xtradb/ChangeLog b/storage/xtradb/ChangeLog
index 102db3d7824..7a4cacb5b43 100644
--- a/storage/xtradb/ChangeLog
+++ b/storage/xtradb/ChangeLog
@@ -1,3 +1,110 @@
+2011-10-25	The InnoDB Team
+
+	* handler/ha_innodb.cc, row/row0ins.c:
+	Fix Bug#13002783 PARTIALLY UNINITIALIZED CASCADE UPDATE VECTOR
+
+2011-08-08	The InnoDB Team
+
+	* row/row0sel.c:
+	Fix Bug#12835650 VARCHAR maximum length performance impact
+
+2011-08-08	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Fix Bug#12770537 I_S.TABLES.DATA_LENGTH DOES NOT SHOW ON-DISK SIZE
+	FOR COMPRESSED INNODB
+
+2011-07-19	The InnoDB Team
+
+	* buf/buf0buf.c, buf/buf0rea.c, handler/ha_innodb.cc,
+	include/buf0buf.h, include/buf0buf.ic, include/srv0srv.h,
+	srv/srv0srv.c:
+	Fix Bug#12356373 by reintroducing random readahead
+
+2011-06-30	The InnoDB Team
+
+	* row/row0row.c:
+	Fix Bug#12637786 Wrong secondary index entries on CHAR and VARCHAR
+	columns in ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED
+
+2011-06-16	The InnoDB Team
+
+	* btr/btr0cur.c, buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c,
+	include/buf0buddy.h, include/buf0buddy.ic, include/buf0buf.h,
+	include/buf0buf.ic, include/buf0lru.h, include/buf0types.h:
+	Fix Bug#61188 DROP TABLE extremely slow
+
+2011-06-16	The InnoDB Team
+
+	* buf/buf0buddy.c, buf/buf0buf.c, buf/buf0flu.c, buf/buf0lru.c,
+	include/buf0buf.h, include/buf0lru.h:
+	Fix Bug#61341 buf_LRU_insert_zip_clean can be O(N) on LRU length
+
+2011-06-16	The InnoDB Team
+
+	* page/page0zip.c, rem/rem0rec.c:
+	Fix Bug#61191 question about page_zip_available()
+
+2011-06-16	The InnoDB Team
+
+	* btr/btr0btr.c, btr/btr0cur.c, include/btr0btr.h, include/btr0cur.h,
+	include/btr0cur.ic, include/buf0buf.h, include/buf0buf.ic,
+	include/page0cur.ic, include/page0page.h, include/page0page.ic,
+	include/sync0rw.ic, include/sync0sync.h, page/page0cur.c,
+	page/page0page.c, row/row0ins.c, row/row0upd.c,
+	sync/sync0rw.c, sync/sync0sync.c:
+	Fix Bug#12612184 Race condition after btr_cur_pessimistic_update()
+
+2011-06-09	The InnoDB Team
+	* btr/btr0cur.c, include/rem0rec.h, include/rem0rec.ic,
+	* row/row0row.c, row/row0vers.c, trx/trx0rec.c:
+	Instrumentation for Bug#12612184 Race condition in row_upd_clust_rec()
+
+2011-05-19	The InnoDB Team
+
+	* row/row0row.c:
+	Fix Bug#12429576 Assertion failure on purge of column prefix index
+
+2011-04-07	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/ha_innodb.h, handler/handler0alter.cc:
+	Fix Bug #52409 Assertion failure: long semaphore wait
+
+2011-04-07	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/trx0trx.h, include/trx0undo.h,
+	log/log0log.c, trx/trx0sys.c, trx/trx0trx.c, trx/trx0undo.c:
+	Fix Bug #59641 Prepared XA transaction in system after hard crash
+	causes future shutdown hang
+
+2011-03-30	The InnoDB Team
+
+	* srv/srv0srv.c, sync/sync0arr.h, sync/sync0arr.c:
+	Fix Bug#11877216 InnoDB too eager to commit suicide on a busy server
+
+2011-03-15	The InnoDB Team
+
+	* btr/btr0cur.c, page/page0zip.c:
+	Fix Bug#11849231 inflateInit() invoked without initializing all memory
+
+2011-02-28	The InnoDB Team
+
+	* btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c:
+	Fix Bug#58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()
+	and compressed tables
+
+2011-02-15	The InnoDB Team
+
+	* sync/sync0rw.c, innodb_bug59307.test:
+	Bug#59307 Valgrind: uninitialized value in
+	rw_lock_set_writer_id_and_recursion_flag()
+
+2011-02-14	The InnoDB Team
+
+	* handler/handler0alter.cc:
+	Bug#59749 Enabling concurrent reads while creating non-primary
+	unique index gives failures
+
 2011-01-31	The InnoDB Team
 
 	* btr/btr0cur.c, include/row0upd.h,
diff --git a/storage/xtradb/btr/btr0btr.c b/storage/xtradb/btr/btr0btr.c
index 2fb14b06a7b..396ad422010 100644
--- a/storage/xtradb/btr/btr0btr.c
+++ b/storage/xtradb/btr/btr0btr.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +42,560 @@ Created 6/2/1994 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "trx0trx.h"
 
+#ifdef UNIV_BLOB_DEBUG
+# include "srv0srv.h"
+# include "ut0rbt.h"
+
+/** TRUE when messages about index->blobs modification are enabled. */
+static ibool btr_blob_dbg_msg;
+
+/** Issue a message about an operation on index->blobs.
+@param op	operation
+@param b	the entry being subjected to the operation
+@param ctx	the context of the operation */
+#define btr_blob_dbg_msg_issue(op, b, ctx)			\
+	fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n",	\
+		(b)->ref_page_no, (b)->ref_heap_no,		\
+		(b)->ref_field_no, (b)->blob_page_no, ctx,	\
+		(b)->owner, (b)->always_owner, (b)->del)
+
+/** Insert to index->blobs a reference to an off-page column.
+@param index	the index tree
+@param b	the reference
+@param ctx	context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_insert(
+/*====================*/
+	dict_index_t*		index,	/*!< in/out: index tree */
+	const btr_blob_dbg_t*	b,	/*!< in: the reference */
+	const char*		ctx)	/*!< in: context (for logging) */
+{
+	if (btr_blob_dbg_msg) {
+		btr_blob_dbg_msg_issue("insert", b, ctx);
+	}
+	mutex_enter(&index->blobs_mutex);
+	rbt_insert(index->blobs, b, b);
+	mutex_exit(&index->blobs_mutex);
+}
+
+/** Remove from index->blobs a reference to an off-page column.
+@param index	the index tree
+@param b	the reference
+@param ctx	context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_delete(
+/*====================*/
+	dict_index_t*		index,	/*!< in/out: index tree */
+	const btr_blob_dbg_t*	b,	/*!< in: the reference */
+	const char*		ctx)	/*!< in: context (for logging) */
+{
+	if (btr_blob_dbg_msg) {
+		btr_blob_dbg_msg_issue("delete", b, ctx);
+	}
+	mutex_enter(&index->blobs_mutex);
+	ut_a(rbt_delete(index->blobs, b));
+	mutex_exit(&index->blobs_mutex);
+}
+
+/**************************************************************//**
+Comparator for items (btr_blob_dbg_t) in index->blobs.
+The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no).
+@return negative, 0 or positive if *a<*b, *a=*b, *a>*b */
+static
+int
+btr_blob_dbg_cmp(
+/*=============*/
+	const void*	a,	/*!< in: first btr_blob_dbg_t to compare */
+	const void*	b)	/*!< in: second btr_blob_dbg_t to compare */
+{
+	const btr_blob_dbg_t*	aa	= a;
+	const btr_blob_dbg_t*	bb	= b;
+
+	ut_ad(aa != NULL);
+	ut_ad(bb != NULL);
+
+	if (aa->ref_page_no != bb->ref_page_no) {
+		return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
+	}
+	if (aa->ref_heap_no != bb->ref_heap_no) {
+		return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
+	}
+	if (aa->ref_field_no != bb->ref_field_no) {
+		return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
+	}
+	return(0);
+}
+
+/**************************************************************//**
+Add a reference to an off-page column to the index->blobs map. */
+UNIV_INTERN
+void
+btr_blob_dbg_add_blob(
+/*==================*/
+	const rec_t*	rec,		/*!< in: clustered index record */
+	ulint		field_no,	/*!< in: off-page column number */
+	ulint		page_no,	/*!< in: start page of the column */
+	dict_index_t*	index,		/*!< in/out: index tree */
+	const char*	ctx)		/*!< in: context (for logging) */
+{
+	btr_blob_dbg_t	b;
+	const page_t*	page	= page_align(rec);
+
+	ut_a(index->blobs);
+
+	b.blob_page_no = page_no;
+	b.ref_page_no = page_get_page_no(page);
+	b.ref_heap_no = page_rec_get_heap_no(rec);
+	b.ref_field_no = field_no;
+	ut_a(b.ref_field_no >= index->n_uniq);
+	b.always_owner = b.owner = TRUE;
+	b.del = FALSE;
+	ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
+	btr_blob_dbg_rbt_insert(index, &b, ctx);
+}
+
+/**************************************************************//**
+Add to index->blobs any references to off-page columns from a record.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add_rec(
+/*=================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in/out: index */
+	const ulint*	offsets,/*!< in: offsets */
+	const char*	ctx)	/*!< in: context (for logging) */
+{
+	ulint		count	= 0;
+	ulint		i;
+	btr_blob_dbg_t	b;
+	ibool		del;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	if (!rec_offs_any_extern(offsets)) {
+		return(0);
+	}
+
+	b.ref_page_no = page_get_page_no(page_align(rec));
+	b.ref_heap_no = page_rec_get_heap_no(rec);
+	del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint		len;
+			const byte*	field_ref = rec_get_nth_field(
+				rec, offsets, i, &len);
+
+			ut_a(len != UNIV_SQL_NULL);
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			if (!memcmp(field_ref, field_ref_zero,
+				    BTR_EXTERN_FIELD_REF_SIZE)) {
+				/* the column has not been stored yet */
+				continue;
+			}
+
+			b.ref_field_no = i;
+			b.blob_page_no = mach_read_from_4(
+				field_ref + BTR_EXTERN_PAGE_NO);
+			ut_a(b.ref_field_no >= index->n_uniq);
+			b.always_owner = b.owner
+				= !(field_ref[BTR_EXTERN_LEN]
+				    & BTR_EXTERN_OWNER_FLAG);
+			b.del = del;
+
+			btr_blob_dbg_rbt_insert(index, &b, ctx);
+			count++;
+		}
+	}
+
+	return(count);
+}
+
+/**************************************************************//**
+Display the references to off-page columns.
+This function is to be called from a debugger,
+for example when a breakpoint on ut_dbg_assertion_failed is hit. */
+UNIV_INTERN
+void
+btr_blob_dbg_print(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index tree */
+{
+	const ib_rbt_node_t*	node;
+
+	if (!index->blobs) {
+		return;
+	}
+
+	/* We intentionally do not acquire index->blobs_mutex here.
+	This function is to be called from a debugger, and the caller
+	should make sure that the index->blobs_mutex is held. */
+
+	for (node = rbt_first(index->blobs);
+	     node != NULL; node = rbt_next(index->blobs, node)) {
+		const btr_blob_dbg_t*	b
+			= rbt_value(btr_blob_dbg_t, node);
+		fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
+			b->ref_page_no, b->ref_heap_no, b->ref_field_no,
+			b->blob_page_no,
+			b->owner ? "" : "(disowned)",
+			b->always_owner ? "" : "(has disowned)",
+			b->del ? "(deleted)" : "");
+	}
+}
+
+/**************************************************************//**
+Remove from index->blobs any references to off-page columns from a record.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove_rec(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in/out: index */
+	const ulint*	offsets,/*!< in: offsets */
+	const char*	ctx)	/*!< in: context (for logging) */
+{
+	ulint		i;
+	ulint		count	= 0;
+	btr_blob_dbg_t	b;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+
+	if (!rec_offs_any_extern(offsets)) {
+		return(0);
+	}
+
+	b.ref_page_no = page_get_page_no(page_align(rec));
+	b.ref_heap_no = page_rec_get_heap_no(rec);
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint		len;
+			const byte*	field_ref = rec_get_nth_field(
+				rec, offsets, i, &len);
+
+			ut_a(len != UNIV_SQL_NULL);
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			b.ref_field_no = i;
+			b.blob_page_no = mach_read_from_4(
+				field_ref + BTR_EXTERN_PAGE_NO);
+
+			switch (b.blob_page_no) {
+			case 0:
+				/* The column has not been stored yet.
+				The BLOB pointer must be all zero.
+				There cannot be a BLOB starting at
+				page 0, because page 0 is reserved for
+				the tablespace header. */
+				ut_a(!memcmp(field_ref, field_ref_zero,
+					     BTR_EXTERN_FIELD_REF_SIZE));
+				/* fall through */
+			case FIL_NULL:
+				/* the column has been freed already */
+				continue;
+			}
+
+			btr_blob_dbg_rbt_delete(index, &b, ctx);
+			count++;
+		}
+	}
+
+	return(count);
+}
+
+/**************************************************************//**
+Check that there are no references to off-page columns from or to
+the given page. Invoked when freeing or clearing a page.
+@return TRUE when no orphan references exist */
+UNIV_INTERN
+ibool
+btr_blob_dbg_is_empty(
+/*==================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		page_no)	/*!< in: page number */
+{
+	const ib_rbt_node_t*	node;
+	ibool			success	= TRUE;
+
+	if (!index->blobs) {
+		return(success);
+	}
+
+	mutex_enter(&index->blobs_mutex);
+
+	for (node = rbt_first(index->blobs);
+	     node != NULL; node = rbt_next(index->blobs, node)) {
+		const btr_blob_dbg_t*	b
+			= rbt_value(btr_blob_dbg_t, node);
+
+		if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
+			continue;
+		}
+
+		fprintf(stderr,
+			"InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
+			b->owner ? "" : "(disowned)",
+			b->always_owner ? "" : "(has disowned)",
+			b->del ? "(deleted)" : "",
+			b->ref_page_no, b->ref_heap_no, b->ref_field_no,
+			b->blob_page_no);
+
+		if (b->blob_page_no != page_no || b->owner || !b->del) {
+			success = FALSE;
+		}
+	}
+
+	mutex_exit(&index->blobs_mutex);
+	return(success);
+}
+
+/**************************************************************//**
+Count and process all references to off-page columns on a page.
+@return number of references processed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_op(
+/*============*/
+	const page_t*		page,	/*!< in: B-tree leaf page */
+	const rec_t*		rec,	/*!< in: record to start from
+					(NULL to process the whole page) */
+	dict_index_t*		index,	/*!< in/out: index */
+	const char*		ctx,	/*!< in: context (for logging) */
+	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
+{
+	ulint		count	= 0;
+	mem_heap_t*	heap	= NULL;
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets	= offsets_;
+	rec_offs_init(offsets_);
+
+	ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	ut_a(!rec || page_align(rec) == page);
+
+	if (!index->blobs || !page_is_leaf(page)
+	    || !dict_index_is_clust(index)) {
+		return(0);
+	}
+
+	if (rec == NULL) {
+		rec = page_get_infimum_rec(page);
+	}
+
+	do {
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		count += op(rec, index, offsets, ctx);
+		rec = page_rec_get_next_const(rec);
+	} while (!page_rec_is_supremum(rec));
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	return(count);
+}
+
+/**************************************************************//**
+Count and add to index->blobs any references to off-page columns
+from records on a page.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add(
+/*=============*/
+	const page_t*	page,	/*!< in: rewritten page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+{
+	btr_blob_dbg_assert_empty(index, page_get_page_no(page));
+
+	return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
+}
+
+/**************************************************************//**
+Count and remove from index->blobs any references to off-page columns
+from records on a page.
+Used when reorganizing a page, before copying the records.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove(
+/*================*/
+	const page_t*	page,	/*!< in: b-tree page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+{
+	ulint	count;
+
+	count = btr_blob_dbg_op(page, NULL, index, ctx,
+				btr_blob_dbg_remove_rec);
+
+	/* Check that no references exist. */
+	btr_blob_dbg_assert_empty(index, page_get_page_no(page));
+
+	return(count);
+}
+
+/**************************************************************//**
+Restore in index->blobs any references to off-page columns
+Used when page reorganize fails due to compressed page overflow. */
+UNIV_INTERN
+void
+btr_blob_dbg_restore(
+/*=================*/
+	const page_t*	npage,	/*!< in: page that failed to compress  */
+	const page_t*	page,	/*!< in: copy of original page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+{
+	ulint	removed;
+	ulint	added;
+
+	ut_a(page_get_page_no(npage) == page_get_page_no(page));
+	ut_a(page_get_space_id(npage) == page_get_space_id(page));
+
+	removed = btr_blob_dbg_remove(npage, index, ctx);
+	added = btr_blob_dbg_add(page, index, ctx);
+	ut_a(added == removed);
+}
+
+/**************************************************************//**
+Modify the 'deleted' flag of a record. */
+UNIV_INTERN
+void
+btr_blob_dbg_set_deleted_flag(
+/*==========================*/
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in/out: index */
+	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
+	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
+{
+	const ib_rbt_node_t*	node;
+	btr_blob_dbg_t		b;
+	btr_blob_dbg_t*		c;
+	ulint			i;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_a(dict_index_is_clust(index));
+	ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
+
+	if (!rec_offs_any_extern(offsets) || !index->blobs) {
+
+		return;
+	}
+
+	b.ref_page_no = page_get_page_no(page_align(rec));
+	b.ref_heap_no = page_rec_get_heap_no(rec);
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint		len;
+			const byte*	field_ref = rec_get_nth_field(
+				rec, offsets, i, &len);
+
+			ut_a(len != UNIV_SQL_NULL);
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			b.ref_field_no = i;
+			b.blob_page_no = mach_read_from_4(
+				field_ref + BTR_EXTERN_PAGE_NO);
+
+			switch (b.blob_page_no) {
+			case 0:
+				ut_a(memcmp(field_ref, field_ref_zero,
+					    BTR_EXTERN_FIELD_REF_SIZE));
+				/* page number 0 is for the
+				page allocation bitmap */
+			case FIL_NULL:
+				/* the column has been freed already */
+				ut_error;
+			}
+
+			mutex_enter(&index->blobs_mutex);
+			node = rbt_lookup(index->blobs, &b);
+			ut_a(node);
+
+			c = rbt_value(btr_blob_dbg_t, node);
+			/* The flag should be modified. */
+			c->del = del;
+			if (btr_blob_dbg_msg) {
+				b = *c;
+				mutex_exit(&index->blobs_mutex);
+				btr_blob_dbg_msg_issue("del_mk", &b, "");
+			} else {
+				mutex_exit(&index->blobs_mutex);
+			}
+		}
+	}
+}
+
+/**************************************************************//**
+Change the ownership of an off-page column. */
+UNIV_INTERN
+void
+btr_blob_dbg_owner(
+/*===============*/
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in/out: index */
+	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
+	ulint			i,	/*!< in: ith field in rec */
+	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
+{
+	const ib_rbt_node_t*	node;
+	btr_blob_dbg_t		b;
+	const byte*		field_ref;
+	ulint			len;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_a(rec_offs_nth_extern(offsets, i));
+
+	field_ref = rec_get_nth_field(rec, offsets, i, &len);
+	ut_a(len != UNIV_SQL_NULL);
+	ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+	field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+	b.ref_page_no = page_get_page_no(page_align(rec));
+	b.ref_heap_no = page_rec_get_heap_no(rec);
+	b.ref_field_no = i;
+	b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
+	b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
+
+	ut_a(b.owner == own);
+
+	mutex_enter(&index->blobs_mutex);
+	node = rbt_lookup(index->blobs, &b);
+	/* row_ins_clust_index_entry_by_modify() invokes
+	btr_cur_unmark_extern_fields() also for the newly inserted
+	references, which are all zero bytes until the columns are stored.
+	The node lookup must fail if and only if that is the case. */
+	ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
+	     == !node);
+
+	if (node) {
+		btr_blob_dbg_t*	c = rbt_value(btr_blob_dbg_t, node);
+		/* Some code sets ownership from TRUE to TRUE.
+		We do not allow changing ownership from FALSE to FALSE. */
+		ut_a(own || c->owner);
+
+		c->owner = own;
+		if (!own) {
+			c->always_owner = FALSE;
+		}
+	}
+
+	mutex_exit(&index->blobs_mutex);
+}
+#endif /* UNIV_BLOB_DEBUG */
+
 /*
 Latching strategy of the InnoDB B-tree
 --------------------------------------
@@ -289,7 +843,7 @@ btr_get_next_user_rec(
 /**************************************************************//**
 Creates a new index page (not the root, and also not
 used in page reorganization).  @see btr_page_empty(). */
-static
+UNIV_INTERN
 void
 btr_page_create(
 /*============*/
@@ -302,6 +856,7 @@ btr_page_create(
 	page_t*		page = buf_block_get_frame(block);
 
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
 
 	if (UNIV_LIKELY_NULL(page_zip)) {
 		page_create_zip(block, index, level, mtr);
@@ -501,6 +1056,7 @@ btr_page_free_low(
 	modify clock */
 
 	buf_block_modify_clock_inc(block);
+	btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
 
 	if (dict_index_is_ibuf(index)) {
 
@@ -785,6 +1341,13 @@ btr_create(
 		block = buf_page_get(space, zip_size, page_no,
 				     RW_X_LATCH, mtr);
 	} else {
+#ifdef UNIV_BLOB_DEBUG
+		if ((type & DICT_CLUSTERED) && !index->blobs) {
+			mutex_create(&index->blobs_mutex, SYNC_ANY_LATCH);
+			index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
+						  btr_blob_dbg_cmp);
+		}
+#endif /* UNIV_BLOB_DEBUG */
 		block = fseg_create(space, 0,
 				    PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
 	}
@@ -1026,6 +1589,7 @@ btr_page_reorganize_low(
 
 	block->check_index_page_at_flush = TRUE;
 #endif /* !UNIV_HOTBACKUP */
+	btr_blob_dbg_remove(page, index, "btr_page_reorganize");
 
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
@@ -1054,6 +1618,8 @@ btr_page_reorganize_low(
 	    (!page_zip_compress(page_zip, page, index, NULL))) {
 
 		/* Restore the old page and exit. */
+		btr_blob_dbg_restore(page, temp_page, index,
+				     "btr_page_reorganize_compress_fail");
 
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 		/* Check that the bytes that we skip are identical. */
@@ -1168,7 +1734,7 @@ btr_parse_page_reorganize(
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Empties an index page.  @see btr_page_create(). */
-static
+UNIV_INTERN
 void
 btr_page_empty(
 /*===========*/
@@ -1187,6 +1753,7 @@ btr_page_empty(
 #endif /* UNIV_ZIP_DEBUG */
 
 	btr_search_drop_page_hash_index(block);
+	btr_blob_dbg_remove(page, index, "btr_page_empty");
 
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
@@ -1729,13 +2296,13 @@ btr_insert_on_non_leaf_level_func(
 /**************************************************************//**
 Attaches the halves of an index page on the appropriate level in an
 index tree. */
-static
+UNIV_INTERN
 void
 btr_attach_half_pages(
 /*==================*/
 	dict_index_t*	index,		/*!< in: the index tree */
 	buf_block_t*	block,		/*!< in/out: page to be split */
-	rec_t*		split_rec,	/*!< in: first record on upper
+	const rec_t*	split_rec,	/*!< in: first record on upper
 					half page */
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
@@ -2427,15 +2994,16 @@ btr_node_ptr_delete(
 	ut_a(err == DB_SUCCESS);
 
 	if (!compressed) {
-		btr_cur_compress_if_useful(&cursor, mtr);
+		btr_cur_compress_if_useful(&cursor, FALSE, mtr);
 	}
 }
 
 /*************************************************************//**
 If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height. */
+father page, thus reducing the tree height.
+@return father block */
 static
-void
+buf_block_t*
 btr_lift_page_up(
 /*=============*/
 	dict_index_t*	index,	/*!< in: index tree */
@@ -2527,6 +3095,7 @@ btr_lift_page_up(
 						       index);
 	}
 
+	btr_blob_dbg_remove(page, index, "btr_lift_page_up");
 	lock_update_copy_and_discard(father_block, block);
 
 	/* Go upward to root page, decrementing levels by one. */
@@ -2551,6 +3120,8 @@ btr_lift_page_up(
 	}
 	ut_ad(page_validate(father_page, index));
 	ut_ad(btr_check_node_ptr(index, father_block, mtr));
+
+	return(father_block);
 }
 
 /*************************************************************//**
@@ -2567,11 +3138,13 @@ UNIV_INTERN
 ibool
 btr_compress(
 /*=========*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to merge or lift;
-				the page must not be empty: in record delete
-				use btr_discard_page if the page would become
-				empty */
-	mtr_t*		mtr)	/*!< in: mtr */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to merge
+				or lift; the page must not be empty:
+				when deleting records, use btr_discard_page()
+				if the page would become empty */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	dict_index_t*	index;
 	ulint		space;
@@ -2589,12 +3162,14 @@ btr_compress(
 	ulint*		offsets;
 	ulint		data_size;
 	ulint		n_recs;
+	ulint		nth_rec = 0; /* remove bogus warning */
 	ulint		max_ins_size;
 	ulint		max_ins_size_reorg;
 
 	block = btr_cur_get_block(cursor);
 	page = btr_cur_get_page(cursor);
 	index = btr_cur_get_index(cursor);
+
 	ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
 
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
@@ -2615,6 +3190,10 @@ btr_compress(
 	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
 					    &father_cursor);
 
+	if (adjust) {
+		nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+	}
+
 	/* Decide the page to which we try to merge and which will inherit
 	the locks */
 
@@ -2641,9 +3220,9 @@ btr_compress(
 	} else {
 		/* The page is the only one on the level, lift the records
 		to the father */
-		btr_lift_page_up(index, block, mtr);
-		mem_heap_free(heap);
-		return(TRUE);
+
+		merge_block = btr_lift_page_up(index, block, mtr);
+		goto func_exit;
 	}
 
 	n_recs = page_get_n_recs(page);
@@ -2725,6 +3304,10 @@ err_exit:
 
 		btr_node_ptr_delete(index, block, mtr);
 		lock_update_merge_left(merge_block, orig_pred, block);
+
+		if (adjust) {
+			nth_rec += page_rec_get_n_recs_before(orig_pred);
+		}
 	} else {
 		rec_t*		orig_succ;
 #ifdef UNIV_BTR_DEBUG
@@ -2788,7 +3371,7 @@ err_exit:
 		lock_update_merge_right(merge_block, orig_succ, block);
 	}
 
-	mem_heap_free(heap);
+	btr_blob_dbg_remove(page, index, "btr_compress");
 
 	if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
 		/* Update the free bits of the B-tree page in the
@@ -2840,6 +3423,16 @@ err_exit:
 	btr_page_free(index, block, mtr);
 
 	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+func_exit:
+	mem_heap_free(heap);
+
+	if (adjust) {
+		btr_cur_position(
+			index,
+			page_rec_get_nth(merge_block->frame, nth_rec),
+			merge_block, cursor);
+	}
+
 	return(TRUE);
 }
 
@@ -3018,6 +3611,8 @@ btr_discard_page(
 				    block);
 	}
 
+	btr_blob_dbg_remove(page, index, "btr_discard_page");
+
 	/* Free the file page */
 	btr_page_free(index, block, mtr);
 
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c
index d16522731c4..51a2f784fbf 100644
--- a/storage/xtradb/btr/btr0cur.c
+++ b/storage/xtradb/btr/btr0cur.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -1046,6 +1046,11 @@ btr_cur_ins_lock_and_undo(
 	rec_t*		rec;
 	roll_ptr_t	roll_ptr;
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip LOCK, UNDO */
+		return(DB_SUCCESS);
+	}
+
 	/* Check if we have to wait for a lock: enqueue an explicit lock
 	request if yes */
 
@@ -1177,7 +1182,7 @@ btr_cur_optimistic_insert(
 	}
 #endif /* UNIV_DEBUG */
 
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	max_size = page_get_max_insert_size_after_reorganize(page, 1);
 	leaf = page_is_leaf(page);
 
@@ -1272,6 +1277,12 @@ fail_err:
 		goto fail_err;
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip CHANGE, LOG */
+		*big_rec = big_rec_vec;
+		return(err); /* == DB_SUCCESS */
+	}
+
 	page_cursor = btr_cur_get_page_cur(cursor);
 
 	/* Now, try the insert */
@@ -1414,10 +1425,10 @@ btr_cur_pessimistic_insert(
 
 	*big_rec = NULL;
 
-	ut_ad(mtr_memo_contains(mtr,
+	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr,
 				dict_index_get_lock(btr_cur_get_index(cursor)),
 				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
 
 	/* Try first an optimistic insert; reset the cursor flag: we do not
@@ -1483,6 +1494,16 @@ btr_cur_pessimistic_insert(
 		}
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip CHANGE, LOG */
+		if (n_extents > 0) {
+			fil_space_release_free_extents(index->space,
+						       n_reserved);
+		}
+		*big_rec = big_rec_vec;
+		return(DB_SUCCESS);
+	}
+
 	if (dict_index_get_page(index)
 	    == buf_block_get_page_no(btr_cur_get_block(cursor))) {
 
@@ -1539,6 +1560,11 @@ btr_cur_upd_lock_and_undo(
 
 	ut_ad(cursor && update && thr && roll_ptr);
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip LOCK, UNDO */
+		return(DB_SUCCESS);
+	}
+
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 
@@ -1837,6 +1863,14 @@ btr_cur_update_in_place(
 		return(err);
 	}
 
+	if (trx->fake_changes) {
+		/* skip CHANGE, LOG */
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+		return(err); /* == DB_SUCCESS */
+	}
+
 	if (block->is_hashed) {
 		/* The function row_upd_changes_ord_field_binary works only
 		if the update vector was built for a clustered index, we must
@@ -1929,7 +1963,6 @@ btr_cur_optimistic_update(
 	ulint		old_rec_size;
 	dtuple_t*	new_entry;
 	roll_ptr_t	roll_ptr;
-	trx_t*		trx;
 	mem_heap_t*	heap;
 	ulint		i;
 	ulint		n_ext;
@@ -1940,12 +1973,16 @@ btr_cur_optimistic_update(
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 	/* The insert buffer tree should never be updated in place. */
 	ut_ad(!dict_index_is_ibuf(index));
 
 	heap = mem_heap_create(1024);
 	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	ut_a(!rec_offs_any_null_extern(rec, offsets)
+	     || trx_is_recv(thr_get_trx(thr)));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 #ifdef UNIV_DEBUG
 	if (btr_cur_print_record_ops && thr) {
@@ -2050,6 +2087,11 @@ any_extern:
 		goto err_exit;
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip CHANGE, LOG */
+		goto err_exit; /* == DB_SUCCESS */
+	}
+
 	/* Ok, we may do the replacement. Store on the page infimum the
 	explicit locks on rec, before deleting rec (see the comment in
 	btr_cur_pessimistic_update). */
@@ -2068,13 +2110,11 @@ any_extern:
 
 	page_cur_move_to_prev(page_cursor);
 
-	trx = thr_get_trx(thr);
-
 	if (!(flags & BTR_KEEP_SYS_FLAG)) {
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 					      roll_ptr);
 		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx->id);
+					      thr_get_trx(thr)->id);
 	}
 
 	/* There are no externally stored columns in new_entry */
@@ -2160,7 +2200,9 @@ btr_cur_pessimistic_update(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging, locking, and rollback
 				flags */
-	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
+				cursor may become invalid if *big_rec == NULL
+				|| !(flags & BTR_KEEP_POS_FLAG) */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
@@ -2200,9 +2242,9 @@ btr_cur_pessimistic_update(
 	rec = btr_cur_get_rec(cursor);
 	index = cursor->index;
 
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, dict_index_get_lock(index),
 				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page));
 #endif /* UNIV_ZIP_DEBUG */
@@ -2290,6 +2332,9 @@ btr_cur_pessimistic_update(
 
 		ut_ad(big_rec_vec == NULL);
 
+		/* fake_changes should not cause undo. so never reaches here */
+		ut_ad(!(trx->fake_changes));
+
 		btr_rec_free_updated_extern_fields(
 			index, rec, page_zip, offsets, update,
 			trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
@@ -2299,7 +2344,7 @@ btr_cur_pessimistic_update(
 	record to be inserted: we have to remember which fields were such */
 
 	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
+	ut_ad(rec_offs_validate(rec, index, offsets));
 	n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
 
 	if (UNIV_LIKELY_NULL(page_zip)) {
@@ -2322,6 +2367,16 @@ make_external:
 			err = DB_TOO_BIG_RECORD;
 			goto return_after_reservations;
 		}
+
+		ut_ad(page_is_leaf(page));
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(flags & BTR_KEEP_POS_FLAG);
+	}
+
+	if (trx->fake_changes) {
+		/* skip CHANGE, LOG */
+		err = DB_SUCCESS;
+		goto return_after_reservations;
 	}
 
 	/* Store state of explicit locks on rec on the page infimum record,
@@ -2349,6 +2404,8 @@ make_external:
 	rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
 
 	if (rec) {
+		page_cursor->rec = rec;
+
 		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
 						   rec, block);
 
@@ -2362,7 +2419,10 @@ make_external:
 						     rec, index, offsets, mtr);
 		}
 
-		btr_cur_compress_if_useful(cursor, mtr);
+		btr_cur_compress_if_useful(
+			cursor,
+			big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
+			mtr);
 
 		if (page_zip && !dict_index_is_clust(index)
 		    && page_is_leaf(page)) {
@@ -2382,6 +2442,21 @@ make_external:
 		}
 	}
 
+	if (big_rec_vec) {
+		ut_ad(page_is_leaf(page));
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(flags & BTR_KEEP_POS_FLAG);
+
+		/* btr_page_split_and_insert() in
+		btr_cur_pessimistic_insert() invokes
+		mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+		We must keep the index->lock when we created a
+		big_rec, so that row_upd_clust_rec() can store the
+		big_rec in the same mini-transaction. */
+
+		mtr_x_lock(dict_index_get_lock(index), mtr);
+	}
+
 	/* Was the record to be updated positioned as the first user
 	record on its page? */
 	was_first = page_cur_is_before_first(page_cursor);
@@ -2397,6 +2472,7 @@ make_external:
 	ut_a(rec);
 	ut_a(err == DB_SUCCESS);
 	ut_a(dummy_big_rec == NULL);
+	page_cursor->rec = rec;
 
 	if (dict_index_is_sec_or_ibuf(index)) {
 		/* Update PAGE_MAX_TRX_ID in the index page header.
@@ -2455,6 +2531,39 @@ return_after_reservations:
 	return(err);
 }
 
+/**************************************************************//**
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	buf_block_t*	block;
+
+	block = btr_cur_get_block(cursor);
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* Keep the locks across the mtr_commit(mtr). */
+	rw_lock_x_lock(dict_index_get_lock(cursor->index));
+	rw_lock_x_lock(&block->lock);
+	mutex_enter(&block->mutex);
+	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+	mutex_exit(&block->mutex);
+	/* Write out the redo log. */
+	mtr_commit(mtr);
+	mtr_start(mtr);
+	/* Reassociate the locks with the mini-transaction.
+	They will be released on mtr_commit(mtr). */
+	mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
+		      MTR_MEMO_X_LOCK);
+	mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
+}
+
 /*==================== B-TREE DELETE MARK AND UNMARK ===============*/
 
 /****************************************************************//**
@@ -2625,6 +2734,11 @@ btr_cur_del_mark_set_clust_rec(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip LOCK, UNDO, CHANGE, LOG */
+		return(DB_SUCCESS);
+	}
+
 	err = lock_clust_rec_modify_check_and_lock(flags, block,
 						   rec, index, offsets, thr);
 
@@ -2647,6 +2761,7 @@ btr_cur_del_mark_set_clust_rec(
 
 	page_zip = buf_block_get_page_zip(block);
 
+	btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
 	btr_rec_set_deleted_flag(rec, page_zip, val);
 
 	trx = thr_get_trx(thr);
@@ -2761,6 +2876,11 @@ btr_cur_del_mark_set_sec_rec(
 	rec_t*		rec;
 	ulint		err;
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		/* skip LOCK, CHANGE, LOG */
+		return(DB_SUCCESS);
+	}
+
 	block = btr_cur_get_block(cursor);
 	rec = btr_cur_get_rec(cursor);
 
@@ -2833,10 +2953,12 @@ UNIV_INTERN
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
-				cursor does not stay valid if compression
-				occurs */
-	mtr_t*		mtr)	/*!< in: mtr */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to compress;
+				cursor does not stay valid if !adjust and
+				compression occurs */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ut_ad(mtr_memo_contains(mtr,
 				dict_index_get_lock(btr_cur_get_index(cursor)),
@@ -2845,7 +2967,7 @@ btr_cur_compress_if_useful(
 				MTR_MEMO_PAGE_X_FIX));
 
 	return(btr_cur_compress_recommendation(cursor, mtr)
-	       && btr_compress(cursor, mtr));
+	       && btr_compress(cursor, adjust, mtr));
 }
 
 /*******************************************************//**
@@ -3092,7 +3214,7 @@ return_after_reservations:
 	mem_heap_free(heap);
 
 	if (ret == FALSE) {
-		ret = btr_cur_compress_if_useful(cursor, mtr);
+		ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
 	}
 
 	if (n_extents > 0) {
@@ -3116,6 +3238,7 @@ btr_cur_add_path_info(
 {
 	btr_path_t*	slot;
 	rec_t*		rec;
+	page_t*		page;
 
 	ut_a(cursor->path_arr);
 
@@ -3138,8 +3261,155 @@ btr_cur_add_path_info(
 
 	slot = cursor->path_arr + (root_height - height);
 
+	page = page_align(rec);
+
 	slot->nth_rec = page_rec_get_n_recs_before(rec);
-	slot->n_recs = page_get_n_recs(page_align(rec));
+	slot->n_recs = page_get_n_recs(page);
+	slot->page_no = page_get_page_no(page);
+	slot->page_level = btr_page_get_level_low(page);
+}
+
+/*******************************************************************//**
+Estimate the number of rows between slot1 and slot2 for any level on a
+B-tree. This function starts from slot1->page and reads a few pages to
+the right, counting their records. If we reach slot2->page quickly then
+we know exactly how many records there are between slot1 and slot2 and
+we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
+then we calculate the average number of records in the pages scanned
+so far and assume that all pages that we did not scan up to slot2->page
+contain the same number of records, then we multiply that average to
+the number of pages between slot1->page and slot2->page (which is
+n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
+@return	number of rows (exact or estimated) */
+static
+ib_int64_t
+btr_estimate_n_rows_in_range_on_level(
+/*==================================*/
+	dict_index_t*	index,			/*!< in: index */
+	btr_path_t*	slot1,			/*!< in: left border */
+	btr_path_t*	slot2,			/*!< in: right border */
+	ib_int64_t	n_rows_on_prev_level,	/*!< in: number of rows
+						on the previous level for the
+						same descend paths; used to
+						determine the numbe of pages
+						on this level */
+	ibool*		is_n_rows_exact)	/*!< out: TRUE if the returned
+						value is exact i.e. not an
+						estimation */
+{
+	ulint		space;
+	ib_int64_t	n_rows;
+	ulint		n_pages_read;
+	ulint		page_no;
+	ulint		zip_size;
+	ulint		level;
+
+	space = dict_index_get_space(index);
+
+	n_rows = 0;
+	n_pages_read = 0;
+
+	/* Assume by default that we will scan all pages between
+	slot1->page_no and slot2->page_no */
+	*is_n_rows_exact = TRUE;
+
+	/* add records from slot1->page_no which are to the right of
+	the record which serves as a left border of the range, if any */
+	if (slot1->nth_rec < slot1->n_recs) {
+		n_rows += slot1->n_recs - slot1->nth_rec;
+	}
+
+	/* add records from slot2->page_no which are to the left of
+	the record which servers as a right border of the range, if any */
+	if (slot2->nth_rec > 1) {
+		n_rows += slot2->nth_rec - 1;
+	}
+
+	/* count the records in the pages between slot1->page_no and
+	slot2->page_no (non inclusive), if any */
+
+	zip_size = fil_space_get_zip_size(space);
+
+	/* Do not read more than this number of pages in order not to hurt
+	performance with this code which is just an estimation. If we read
+	this many pages before reaching slot2->page_no then we estimate the
+	average from the pages scanned so far */
+	#define N_PAGES_READ_LIMIT	10
+
+	page_no = slot1->page_no;
+	level = slot1->page_level;
+
+	do {
+		mtr_t		mtr;
+		page_t*		page;
+		buf_block_t*	block;
+
+		mtr_start(&mtr);
+
+		/* fetch the page */
+		block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
+				     &mtr);
+
+		page = buf_block_get_frame(block);
+
+		/* It is possible that the tree has been reorganized in the
+		meantime and this is a different page. If this happens the
+		calculated estimate will be bogus, which is not fatal as
+		this is only an estimate. We are sure that a page with
+		page_no exists because InnoDB never frees pages, only
+		reuses them. */
+		if (fil_page_get_type(page) != FIL_PAGE_INDEX
+		    || ut_dulint_cmp(btr_page_get_index_id(page), index->id)
+		    || btr_page_get_level_low(page) != level) {
+
+			/* The page got reused for something else */
+			goto inexact;
+		}
+
+		n_pages_read++;
+
+		if (page_no != slot1->page_no) {
+			/* Do not count the records on slot1->page_no,
+			we already counted them before this loop. */
+			n_rows += page_get_n_recs(page);
+		}
+
+		page_no = btr_page_get_next(page, &mtr);
+
+		mtr_commit(&mtr);
+
+		if (n_pages_read == N_PAGES_READ_LIMIT
+		    || page_no == FIL_NULL) {
+			/* Either we read too many pages or
+			we reached the end of the level without passing
+			through slot2->page_no, the tree must have changed
+			in the meantime */
+			goto inexact;
+		}
+
+	} while (page_no != slot2->page_no);
+
+	return(n_rows);
+
+inexact:
+
+	*is_n_rows_exact = FALSE;
+
+	/* We did interrupt before reaching slot2->page */
+
+	if (n_pages_read > 0) {
+		/* The number of pages on this level is
+		n_rows_on_prev_level, multiply it by the
+		average number of recs per page so far */
+		n_rows = n_rows_on_prev_level
+			* n_rows / n_pages_read;
+	} else {
+		/* The tree changed before we could even
+		start with slot1->page_no */
+		n_rows = 10;
+	}
+
+	return(n_rows);
 }
 
 /*******************************************************************//**
@@ -3164,6 +3434,7 @@ btr_estimate_n_rows_in_range(
 	ibool		diverged_lot;
 	ulint		divergence_level;
 	ib_int64_t	n_rows;
+	ibool		is_n_rows_exact;
 	ulint		i;
 	mtr_t		mtr;
 
@@ -3206,6 +3477,7 @@ btr_estimate_n_rows_in_range(
 	/* We have the path information for the range in path1 and path2 */
 
 	n_rows = 1;
+	is_n_rows_exact = TRUE;
 	diverged = FALSE;	    /* This becomes true when the path is not
 				    the same any more */
 	diverged_lot = FALSE;	    /* This becomes true when the paths are
@@ -3221,7 +3493,7 @@ btr_estimate_n_rows_in_range(
 		if (slot1->nth_rec == ULINT_UNDEFINED
 		    || slot2->nth_rec == ULINT_UNDEFINED) {
 
-			if (i > divergence_level + 1) {
+			if (i > divergence_level + 1 && !is_n_rows_exact) {
 				/* In trees whose height is > 1 our algorithm
 				tends to underestimate: multiply the estimate
 				by 2: */
@@ -3233,7 +3505,9 @@ btr_estimate_n_rows_in_range(
 			to over 1 / 2 of the estimated rows in the whole
 			table */
 
-			if (n_rows > index->table->stat_n_rows / 2) {
+			if (n_rows > index->table->stat_n_rows / 2
+			    && !is_n_rows_exact) {
+
 				n_rows = index->table->stat_n_rows / 2;
 
 				/* If there are just 0 or 1 rows in the table,
@@ -3259,10 +3533,15 @@ btr_estimate_n_rows_in_range(
 					divergence_level = i;
 				}
 			} else {
-				/* Maybe the tree has changed between
-				searches */
-
-				return(10);
+				/* It is possible that
+				slot1->nth_rec >= slot2->nth_rec
+				if, for example, we have a single page
+				tree which contains (inf, 5, 6, supr)
+				and we select where x > 20 and x < 30;
+				in this case slot1->nth_rec will point
+				to the supr record and slot2->nth_rec
+				will point to 6 */
+				n_rows = 0;
 			}
 
 		} else if (diverged && !diverged_lot) {
@@ -3286,8 +3565,9 @@ btr_estimate_n_rows_in_range(
 			}
 		} else if (diverged_lot) {
 
-			n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
-				/ 2;
+			n_rows = btr_estimate_n_rows_in_range_on_level(
+				index, slot1, slot2, n_rows,
+				&is_n_rows_exact);
 		}
 	}
 }
@@ -3679,6 +3959,8 @@ btr_cur_set_ownership_of_extern_field(
 	} else {
 		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
 	}
+
+	btr_blob_dbg_owner(rec, index, offsets, i, val);
 }
 
 /*******************************************************************//**
@@ -3883,7 +4165,7 @@ btr_blob_free(
 	    && buf_block_get_space(block) == space
 	    && buf_block_get_page_no(block) == page_no) {
 
-		if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED
+		if (!buf_LRU_free_block(&block->page, all, TRUE)
 		    && all && block->page.zip.data
 		    /* Now, buf_LRU_free_block() may release mutex temporarily */
 		    && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
@@ -4184,6 +4466,11 @@ btr_store_big_rec_extern_fields_func(
 				}
 
 				if (prev_page_no == FIL_NULL) {
+					btr_blob_dbg_add_blob(
+						rec, big_rec_vec->fields[i]
+						.field_no, page_no, index,
+						"store");
+
 					mach_write_to_4(field_ref
 							+ BTR_EXTERN_SPACE_ID,
 							space_id);
@@ -4259,6 +4546,11 @@ next_zip_page:
 						 MLOG_4BYTES, &mtr);
 
 				if (prev_page_no == FIL_NULL) {
+					btr_blob_dbg_add_blob(
+						rec, big_rec_vec->fields[i]
+						.field_no, page_no, index,
+						"store");
+
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_SPACE_ID,
 							 space_id,
@@ -4427,6 +4719,37 @@ btr_free_externally_stored_field(
 		rec_zip_size = 0;
 	}
 
+#ifdef UNIV_BLOB_DEBUG
+	if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
+	    && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
+		 && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
+		/* This off-page column will be freed.
+		Check that no references remain. */
+
+		btr_blob_dbg_t	b;
+
+		b.blob_page_no = mach_read_from_4(
+			field_ref + BTR_EXTERN_PAGE_NO);
+
+		if (rec) {
+			/* Remove the reference from the record to the
+			BLOB. If the BLOB were not freed, the
+			reference would be removed when the record is
+			removed. Freeing the BLOB will overwrite the
+			BTR_EXTERN_PAGE_NO in the field_ref of the
+			record with FIL_NULL, which would make the
+			btr_blob_dbg information inconsistent with the
+			record. */
+			b.ref_page_no = page_get_page_no(page_align(rec));
+			b.ref_heap_no = page_rec_get_heap_no(rec);
+			b.ref_field_no = i;
+			btr_blob_dbg_rbt_delete(index, &b, "free");
+		}
+
+		btr_blob_dbg_assert_empty(index, b.blob_page_no);
+	}
+#endif /* UNIV_BLOB_DEBUG */
+
 	for (;;) {
 #ifdef UNIV_SYNC_DEBUG
 		buf_block_t*	rec_block;
@@ -4673,27 +4996,45 @@ btr_copy_blob_prefix(
 
 /*******************************************************************//**
 Copies the prefix of a compressed BLOB.  The clustered index record
-that points to this BLOB must be protected by a lock or a page latch. */
+that points to this BLOB must be protected by a lock or a page latch.
+@return	number of bytes written to buf */
 static
-void
+ulint
 btr_copy_zblob_prefix(
 /*==================*/
-	z_stream*	d_stream,/*!< in/out: the decompressing stream */
+	byte*		buf,	/*!< out: the externally stored part of
+				the field, or a prefix of it */
+	ulint		len,	/*!< in: length of buf, in bytes */
 	ulint		zip_size,/*!< in: compressed BLOB page size */
 	ulint		space_id,/*!< in: space id of the BLOB pages */
 	ulint		page_no,/*!< in: page number of the first BLOB page */
 	ulint		offset)	/*!< in: offset on the first BLOB page */
 {
-	ulint	page_type = FIL_PAGE_TYPE_ZBLOB;
+	ulint		page_type = FIL_PAGE_TYPE_ZBLOB;
+	mem_heap_t*	heap;
+	int		err;
+	z_stream	d_stream;
+
+	d_stream.next_out = buf;
+	d_stream.avail_out = len;
+	d_stream.next_in = Z_NULL;
+	d_stream.avail_in = 0;
+
+	/* Zlib inflate needs 32 kilobytes for the default
+	window size, plus a few kilobytes for small objects. */
+	heap = mem_heap_create(40000);
+	page_zip_set_alloc(&d_stream, heap);
 
 	ut_ad(ut_is_2pow(zip_size));
 	ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
 	ut_ad(zip_size <= UNIV_PAGE_SIZE);
 	ut_ad(space_id);
 
+	err = inflateInit(&d_stream);
+	ut_a(err == Z_OK);
+
 	for (;;) {
 		buf_page_t*	bpage;
-		int		err;
 		ulint		next_page_no;
 
 		/* There is no latch on bpage directly.  Instead,
@@ -4709,7 +5050,7 @@ btr_copy_zblob_prefix(
 				" compressed BLOB"
 				" page %lu space %lu\n",
 				(ulong) page_no, (ulong) space_id);
-			return;
+			goto func_exit;
 		}
 
 		if (UNIV_UNLIKELY
@@ -4735,13 +5076,13 @@ btr_copy_zblob_prefix(
 			offset += 4;
 		}
 
-		d_stream->next_in = bpage->zip.data + offset;
-		d_stream->avail_in = zip_size - offset;
+		d_stream.next_in = bpage->zip.data + offset;
+		d_stream.avail_in = zip_size - offset;
 
-		err = inflate(d_stream, Z_NO_FLUSH);
+		err = inflate(&d_stream, Z_NO_FLUSH);
 		switch (err) {
 		case Z_OK:
-			if (!d_stream->avail_out) {
+			if (!d_stream.avail_out) {
 				goto end_of_blob;
 			}
 			break;
@@ -4758,13 +5099,13 @@ inflate_error:
 				" compressed BLOB"
 				" page %lu space %lu returned %d (%s)\n",
 				(ulong) page_no, (ulong) space_id,
-				err, d_stream->msg);
+				err, d_stream.msg);
 		case Z_BUF_ERROR:
 			goto end_of_blob;
 		}
 
 		if (next_page_no == FIL_NULL) {
-			if (!d_stream->avail_in) {
+			if (!d_stream.avail_in) {
 				ut_print_timestamp(stderr);
 				fprintf(stderr,
 					"  InnoDB: unexpected end of"
@@ -4773,7 +5114,7 @@ inflate_error:
 					(ulong) page_no,
 					(ulong) space_id);
 			} else {
-				err = inflate(d_stream, Z_FINISH);
+				err = inflate(&d_stream, Z_FINISH);
 				switch (err) {
 				case Z_STREAM_END:
 				case Z_BUF_ERROR:
@@ -4785,7 +5126,7 @@ inflate_error:
 
 end_of_blob:
 			buf_page_release_zip(bpage);
-			return;
+			goto func_exit;
 		}
 
 		buf_page_release_zip(bpage);
@@ -4797,6 +5138,12 @@ end_of_blob:
 		offset = FIL_PAGE_NEXT;
 		page_type = FIL_PAGE_TYPE_ZBLOB2;
 	}
+
+func_exit:
+	inflateEnd(&d_stream);
+	mem_heap_free(heap);
+	UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
+	return(d_stream.total_out);
 }
 
 /*******************************************************************//**
@@ -4822,28 +5169,8 @@ btr_copy_externally_stored_field_prefix_low(
 	}
 
 	if (UNIV_UNLIKELY(zip_size)) {
-		int		err;
-		z_stream	d_stream;
-		mem_heap_t*	heap;
-
-		/* Zlib inflate needs 32 kilobytes for the default
-		window size, plus a few kilobytes for small objects. */
-		heap = mem_heap_create(40000);
-		page_zip_set_alloc(&d_stream, heap);
-
-		err = inflateInit(&d_stream);
-		ut_a(err == Z_OK);
-
-		d_stream.next_out = buf;
-		d_stream.avail_out = len;
-		d_stream.avail_in = 0;
-
-		btr_copy_zblob_prefix(&d_stream, zip_size,
-				      space_id, page_no, offset);
-		inflateEnd(&d_stream);
-		mem_heap_free(heap);
-		UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
-		return(d_stream.total_out);
+		return(btr_copy_zblob_prefix(buf, len, zip_size,
+					     space_id, page_no, offset));
 	} else {
 		return(btr_copy_blob_prefix(buf, len, space_id,
 					    page_no, offset));
diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c
index f95a5487c94..97fe06f0f5e 100644
--- a/storage/xtradb/btr/btr0pcur.c
+++ b/storage/xtradb/btr/btr0pcur.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -362,33 +362,6 @@ btr_pcur_restore_position_func(
 	return(FALSE);
 }
 
-/**************************************************************//**
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_release_leaf(
-/*==================*/
-	btr_pcur_t*	cursor, /*!< in: persistent cursor */
-	mtr_t*		mtr)	/*!< in: mtr */
-{
-	buf_block_t*	block;
-
-	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
-	block = btr_pcur_get_block(cursor);
-
-	btr_leaf_page_release(block, cursor->latch_mode, mtr);
-
-	cursor->latch_mode = BTR_NO_LATCHES;
-
-	cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
 /*********************************************************//**
 Moves the persistent cursor to the first record on the next page. Releases the
 latch on the current page, and bufferunfixes it. Note that there must not be
diff --git a/storage/xtradb/btr/btr0sea.c b/storage/xtradb/btr/btr0sea.c
index 3b38e2799c2..6e6c533f4af 100644
--- a/storage/xtradb/btr/btr0sea.c
+++ b/storage/xtradb/btr/btr0sea.c
@@ -1373,8 +1373,8 @@ btr_search_drop_page_hash_when_freed(
 	having to fear a deadlock. */
 
 	block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
-				BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
-				&mtr);
+				 BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
+				 &mtr);
 	/* Because the buffer pool mutex was released by
 	buf_page_peek_if_search_hashed(), it is possible that the
 	block was removed from the buffer pool by another thread
diff --git a/storage/xtradb/buf/buf0buddy.c b/storage/xtradb/buf/buf0buddy.c
index db94b4bed24..673d6c55efc 100644
--- a/storage/xtradb/buf/buf0buddy.c
+++ b/storage/xtradb/buf/buf0buddy.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -45,6 +45,14 @@ static ulint buf_buddy_n_frames;
 Protected by buf_pool_mutex. */
 UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
 
+/** Validate a given zip_free list. */
+#define BUF_BUDDY_LIST_VALIDATE(i)				\
+	UT_LIST_VALIDATE(zip_list, buf_page_t,			\
+			 buf_pool->zip_free[i],			\
+			 ut_ad(buf_page_get_state(		\
+				       ut_list_node_313)	\
+			       == BUF_BLOCK_ZIP_FREE))
+
 /**********************************************************************//**
 Get the offset of the buddy of a compressed page frame.
 @return	the buddy relative of page */
@@ -76,22 +84,11 @@ buf_buddy_add_to_free(
 	buf_page_t*	bpage,	/*!< in,own: block to be freed */
 	ulint		i)	/*!< in: index of buf_pool->zip_free[] */
 {
-#ifdef UNIV_DEBUG_VALGRIND
-	buf_page_t*	b  = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-
-	if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
-#endif /* UNIV_DEBUG_VALGRIND */
-
 	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(&zip_free_mutex));
 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 	ut_ad(buf_pool->zip_free[i].start != bpage);
 	UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
-
-#ifdef UNIV_DEBUG_VALGRIND
-	if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
-	UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
-#endif /* UNIV_DEBUG_VALGRIND */
 }
 
 /**********************************************************************//**
@@ -103,26 +100,18 @@ buf_buddy_remove_from_free(
 	buf_page_t*	bpage,	/*!< in: block to be removed */
 	ulint		i)	/*!< in: index of buf_pool->zip_free[] */
 {
-#ifdef UNIV_DEBUG_VALGRIND
+#ifdef UNIV_DEBUG
 	buf_page_t*	prev = UT_LIST_GET_PREV(zip_list, bpage);
 	buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
 
-	if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
-	if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
-
 	ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
 	ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
-#endif /* UNIV_DEBUG_VALGRIND */
+#endif /* UNIV_DEBUG */
 
 	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(&zip_free_mutex));
 	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 	UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
-
-#ifdef UNIV_DEBUG_VALGRIND
-	if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
-	if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
-#endif /* UNIV_DEBUG_VALGRIND */
 }
 
 /**********************************************************************//**
@@ -139,17 +128,13 @@ buf_buddy_alloc_zip(
 	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(&zip_free_mutex));
 	ut_a(i < BUF_BUDDY_SIZES);
+	ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
+
+	ut_d(BUF_BUDDY_LIST_VALIDATE(i));
 
-#ifndef UNIV_DEBUG_VALGRIND
-	/* Valgrind would complain about accessing free memory. */
-	ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-			      ut_ad(buf_page_get_state(ut_list_node_313)
-				    == BUF_BLOCK_ZIP_FREE)));
-#endif /* !UNIV_DEBUG_VALGRIND */
 	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
 
 	if (bpage) {
-		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 
 		buf_buddy_remove_from_free(bpage, i);
@@ -168,13 +153,10 @@ buf_buddy_alloc_zip(
 		}
 	}
 
-#ifdef UNIV_DEBUG
 	if (bpage) {
-		memset(bpage, ~i, BUF_BUDDY_LOW << i);
+		ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
+		UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
 	}
-#endif /* UNIV_DEBUG */
-
-	UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
 
 	return(bpage);
 }
@@ -266,6 +248,7 @@ buf_buddy_alloc_from(
 {
 	ulint	offs	= BUF_BUDDY_LOW << j;
 	ut_ad(j <= BUF_BUDDY_SIZES);
+	ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
 	ut_ad(j >= i);
 	ut_ad(!ut_align_offset(buf, offs));
 
@@ -279,13 +262,7 @@ buf_buddy_alloc_from(
 		bpage = (buf_page_t*) ((byte*) buf + offs);
 		ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
 		bpage->state = BUF_BLOCK_ZIP_FREE;
-#ifndef UNIV_DEBUG_VALGRIND
-		/* Valgrind would complain about accessing free memory. */
-		ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-				      ut_ad(buf_page_get_state(
-						    ut_list_node_313)
-					    == BUF_BLOCK_ZIP_FREE)));
-#endif /* !UNIV_DEBUG_VALGRIND */
+		ut_d(BUF_BUDDY_LIST_VALIDATE(i));
 		buf_buddy_add_to_free(bpage, j);
 	}
 
@@ -295,8 +272,8 @@ buf_buddy_alloc_from(
 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
-The buf_pool_mutex may only be released and reacquired if lru != NULL.
-@return	allocated block, possibly NULL if lru==NULL */
+The buf_pool_mutex may be released and reacquired.
+@return	allocated block, never NULL */
 UNIV_INTERN
 void*
 buf_buddy_alloc_low(
@@ -305,14 +282,15 @@ buf_buddy_alloc_low(
 			or BUF_BUDDY_SIZES */
 	ibool*	lru,	/*!< in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
-			or NULL if the LRU list should not be used */
+			and buf_pool_mutex was temporarily released */
 	ibool	have_page_hash_mutex)
 {
 	buf_block_t*	block;
 
+	ut_ad(lru);
 	//ut_ad(buf_pool_mutex_own());
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
+	ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
 
 	if (i < BUF_BUDDY_SIZES) {
 		/* Try to allocate from the buddy system. */
@@ -335,11 +313,6 @@ buf_buddy_alloc_low(
 		goto alloc_big;
 	}
 
-	if (!lru) {
-
-		return(NULL);
-	}
-
 	/* Try replacing an uncompressed page in the buffer pool. */
 	//buf_pool_mutex_exit();
 	mutex_exit(&LRU_list_mutex);
@@ -368,76 +341,6 @@ func_exit:
 }
 
 /**********************************************************************//**
-Try to relocate the control block of a compressed page.
-@return	TRUE if relocated */
-static
-ibool
-buf_buddy_relocate_block(
-/*=====================*/
-	buf_page_t*	bpage,	/*!< in: block to relocate */
-	buf_page_t*	dpage)	/*!< in: free block to relocate to */
-{
-	buf_page_t*	b;
-
-	//ut_ad(buf_pool_mutex_own());
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-#endif
-
-	switch (buf_page_get_state(bpage)) {
-	case BUF_BLOCK_ZIP_FREE:
-	case BUF_BLOCK_NOT_USED:
-	case BUF_BLOCK_READY_FOR_USE:
-	case BUF_BLOCK_FILE_PAGE:
-	case BUF_BLOCK_MEMORY:
-	case BUF_BLOCK_REMOVE_HASH:
-		/* ut_error; */ /* optimistic */
-	case BUF_BLOCK_ZIP_DIRTY:
-		/* Cannot relocate dirty pages. */
-		return(FALSE);
-
-	case BUF_BLOCK_ZIP_PAGE:
-		break;
-	}
-
-	mutex_enter(&buf_pool_zip_mutex);
-	mutex_enter(&zip_free_mutex);
-
-	if (!buf_page_can_relocate(bpage)) {
-		mutex_exit(&buf_pool_zip_mutex);
-		mutex_exit(&zip_free_mutex);
-		return(FALSE);
-	}
-
-	if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
-		mutex_exit(&buf_pool_zip_mutex);
-		mutex_exit(&zip_free_mutex);
-		return(FALSE);
-	}
-
-	buf_relocate(bpage, dpage);
-	ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
-
-	/* relocate buf_pool->zip_clean */
-	mutex_enter(&flush_list_mutex);
-	b = UT_LIST_GET_PREV(zip_list, dpage);
-	UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
-
-	if (b) {
-		UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
-	} else {
-		UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
-	}
-	mutex_exit(&flush_list_mutex);
-
-	UNIV_MEM_INVALID(bpage, sizeof *bpage);
-
-	mutex_exit(&buf_pool_zip_mutex);
-	mutex_exit(&zip_free_mutex);
-	return(TRUE);
-}
-
-/**********************************************************************//**
 Try to relocate a block.
 @return	TRUE if relocated */
 static
@@ -452,159 +355,120 @@ buf_buddy_relocate(
 	buf_page_t*	bpage;
 	const ulint	size	= BUF_BUDDY_LOW << i;
 	ullint		usec	= ut_time_us(NULL);
+	mutex_t*	mutex;
+	ulint		space;
+	ulint		page_no;
 
 	//ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(&zip_free_mutex));
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_ad(!ut_align_offset(src, size));
 	ut_ad(!ut_align_offset(dst, size));
+	ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
 	UNIV_MEM_ASSERT_W(dst, size);
 
+	if (!have_page_hash_mutex) {
+		mutex_exit(&zip_free_mutex);
+		mutex_enter(&LRU_list_mutex);
+		rw_lock_x_lock(&page_hash_latch);
+	}
+
 	/* We assume that all memory from buf_buddy_alloc()
-	is used for either compressed pages or buf_page_t
-	objects covering compressed pages. */
+	is used for compressed page frames. */
 
 	/* We look inside the allocated objects returned by
-	buf_buddy_alloc() and assume that anything of
-	PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
-	a valid space_id and page_no in the page header.  Should the
-	fields be invalid, we will be unable to relocate the block.
-	We also assume that anything that fits sizeof(buf_page_t)
-	actually is a properly initialized buf_page_t object. */
-
-	if (size >= PAGE_ZIP_MIN_SIZE) {
-		/* This is a compressed page. */
-		mutex_t*	mutex;
-		ulint		space, page_no;
+	buf_buddy_alloc() and assume that each block is a compressed
+	page that contains a valid space_id and page_no in the page
+	header. Should the fields be invalid, we will be unable to
+	relocate the block. */
+
+	/* The src block may be split into smaller blocks,
+	some of which may be free.  Thus, the
+	mach_read_from_4() calls below may attempt to read
+	from free memory.  The memory is "owned" by the buddy
+	allocator (and it has been allocated from the buffer
+	pool), so there is nothing wrong about this.  The
+	mach_read_from_4() calls here will only trigger bogus
+	Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
+	space	= mach_read_from_4((const byte *) src
+			+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+	page_no	= mach_read_from_4((const byte *) src
+			+ FIL_PAGE_OFFSET);
+	/* Suppress Valgrind warnings about conditional jump
+	on uninitialized value. */
+	UNIV_MEM_VALID(&space, sizeof space);
+	UNIV_MEM_VALID(&page_no, sizeof page_no);
+	bpage = buf_page_hash_get(space, page_no);
+
+	if (!bpage || bpage->zip.data != src) {
+		/* The block has probably been freshly
+		allocated by buf_LRU_get_free_block() but not
+		added to buf_pool->page_hash yet.  Obviously,
+		it cannot be relocated. */
 
 		if (!have_page_hash_mutex) {
-			mutex_exit(&zip_free_mutex);
-			mutex_enter(&LRU_list_mutex);
-			rw_lock_x_lock(&page_hash_latch);
-		}
-
-		/* The src block may be split into smaller blocks,
-		some of which may be free.  Thus, the
-		mach_read_from_4() calls below may attempt to read
-		from free memory.  The memory is "owned" by the buddy
-		allocator (and it has been allocated from the buffer
-		pool), so there is nothing wrong about this.  The
-		mach_read_from_4() calls here will only trigger bogus
-		Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
-		space	= mach_read_from_4(
-			(const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-		page_no	= mach_read_from_4(
-			(const byte*) src + FIL_PAGE_OFFSET);
-		/* Suppress Valgrind warnings about conditional jump
-		on uninitialized value. */
-		UNIV_MEM_VALID(&space, sizeof space);
-		UNIV_MEM_VALID(&page_no, sizeof page_no);
-		bpage = buf_page_hash_get(space, page_no);
-
-		if (!bpage || bpage->zip.data != src) {
-			/* The block has probably been freshly
-			allocated by buf_LRU_get_free_block() but not
-			added to buf_pool->page_hash yet.  Obviously,
-			it cannot be relocated. */
-
-			if (!have_page_hash_mutex) {
-				mutex_enter(&zip_free_mutex);
-				mutex_exit(&LRU_list_mutex);
-				rw_lock_x_unlock(&page_hash_latch);
-			}
-			return(FALSE);
-		}
-
-		if (page_zip_get_size(&bpage->zip) != size) {
-			/* The block is of different size.  We would
-			have to relocate all blocks covered by src.
-			For the sake of simplicity, give up. */
-			ut_ad(page_zip_get_size(&bpage->zip) < size);
-
-			if (!have_page_hash_mutex) {
-				mutex_enter(&zip_free_mutex);
-				mutex_exit(&LRU_list_mutex);
-				rw_lock_x_unlock(&page_hash_latch);
-			}
-			return(FALSE);
+			mutex_enter(&zip_free_mutex);
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
 		}
+		return(FALSE);
+	}
 
-		/* To keep latch order */
-		if (have_page_hash_mutex)
-			mutex_exit(&zip_free_mutex);
-
-		/* The block must have been allocated, but it may
-		contain uninitialized data. */
-		UNIV_MEM_ASSERT_W(src, size);
-
-		mutex = buf_page_get_mutex_enter(bpage);
-
-		mutex_enter(&zip_free_mutex);
-
-		if (mutex && buf_page_can_relocate(bpage)) {
-			/* Relocate the compressed page. */
-			ut_a(bpage->zip.data == src);
-			memcpy(dst, src, size);
-			bpage->zip.data = dst;
-			mutex_exit(mutex);
-success:
-			UNIV_MEM_INVALID(src, size);
-			{
-				buf_buddy_stat_t*	buddy_stat
-					= &buf_buddy_stat[i];
-				buddy_stat->relocated++;
-				buddy_stat->relocated_usec
-					+= ut_time_us(NULL) - usec;
-			}
-
-			if (!have_page_hash_mutex) {
-				mutex_exit(&LRU_list_mutex);
-				rw_lock_x_unlock(&page_hash_latch);
-			}
-			return(TRUE);
-		}
+	if (page_zip_get_size(&bpage->zip) != size) {
+		/* The block is of different size.  We would
+		have to relocate all blocks covered by src.
+		For the sake of simplicity, give up. */
+		ut_ad(page_zip_get_size(&bpage->zip) < size);
 
 		if (!have_page_hash_mutex) {
+			mutex_enter(&zip_free_mutex);
 			mutex_exit(&LRU_list_mutex);
 			rw_lock_x_unlock(&page_hash_latch);
 		}
+		return(FALSE);
+	}
 
-		if (mutex) {
-			mutex_exit(mutex);
-		}
-	} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
-		/* This must be a buf_page_t object. */
-#if UNIV_WORD_SIZE == 4
-		/* On 32-bit systems, there is no padding in
-		buf_page_t.  On other systems, Valgrind could complain
-		about uninitialized pad bytes. */
-		UNIV_MEM_ASSERT_RW(src, size);
-#endif
-
+	/* To keep latch order */
+	if (have_page_hash_mutex)
 		mutex_exit(&zip_free_mutex);
 
-		if (!have_page_hash_mutex) {
-			mutex_enter(&LRU_list_mutex);
-			rw_lock_x_lock(&page_hash_latch);
-		}
+	/* The block must have been allocated, but it may
+	contain uninitialized data. */
+	UNIV_MEM_ASSERT_W(src, size);
 
-		if (buf_buddy_relocate_block(src, dst)) {
-			mutex_enter(&zip_free_mutex);
+	mutex = buf_page_get_mutex_enter(bpage);
 
-			if (!have_page_hash_mutex) {
-				mutex_exit(&LRU_list_mutex);
-				rw_lock_x_unlock(&page_hash_latch);
-			}
+	mutex_enter(&zip_free_mutex);
 
-			goto success;
+	if (mutex && buf_page_can_relocate(bpage)) {
+		/* Relocate the compressed page. */
+		ut_a(bpage->zip.data == src);
+		memcpy(dst, src, size);
+		bpage->zip.data = dst;
+		mutex_exit(mutex);
+		UNIV_MEM_INVALID(src, size);
+		{
+			buf_buddy_stat_t*	buddy_stat
+				= &buf_buddy_stat[i];
+			buddy_stat->relocated++;
+			buddy_stat->relocated_usec
+				+= ut_time_us(NULL) - usec;
 		}
 
-		mutex_enter(&zip_free_mutex);
-
 		if (!have_page_hash_mutex) {
 			mutex_exit(&LRU_list_mutex);
 			rw_lock_x_unlock(&page_hash_latch);
 		}
+		return(TRUE);
+	}
+
+	if (!have_page_hash_mutex) {
+		mutex_exit(&LRU_list_mutex);
+		rw_lock_x_unlock(&page_hash_latch);
+	}
+
+	if (mutex) {
+		mutex_exit(mutex);
 	}
 
 	return(FALSE);
@@ -629,12 +493,14 @@ buf_buddy_free_low(
 	ut_ad(mutex_own(&zip_free_mutex));
 	ut_ad(!mutex_own(&buf_pool_zip_mutex));
 	ut_ad(i <= BUF_BUDDY_SIZES);
+	ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
 	ut_ad(buf_buddy_stat[i].used > 0);
 
 	buf_buddy_stat[i].used--;
+
 recombine:
 	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
-	ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+	((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
 
 	if (i == BUF_BUDDY_SIZES) {
 		mutex_exit(&zip_free_mutex);
@@ -647,32 +513,36 @@ recombine:
 	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
 	ut_ad(!buf_pool_contains_zip(buf));
 
-	/* Try to combine adjacent blocks. */
+	/* Do not recombine blocks if there are few free blocks.
+	We may waste up to 15360*max_len bytes to free blocks
+	(1024 + 2048 + 4096 + 8192 = 15360) */
+	if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
+		goto func_exit;
+	}
 
+	/* Try to combine adjacent blocks. */
 	buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
 
 #ifndef UNIV_DEBUG_VALGRIND
-	/* Valgrind would complain about accessing free memory. */
+	/* When Valgrind instrumentation is not enabled, we can read
+	buddy->state to quickly determine that a block is not free.
+	When the block is not free, buddy->state belongs to a compressed
+	page frame that may be flagged uninitialized in our Valgrind
+	instrumentation.  */
 
 	if (buddy->state != BUF_BLOCK_ZIP_FREE) {
 
 		goto buddy_nonfree;
 	}
-
-	/* The field buddy->state can only be trusted for free blocks.
-	If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
-	it is in the free list. */
 #endif /* !UNIV_DEBUG_VALGRIND */
 
 	for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
-		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 		ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
 
 		if (bpage == buddy) {
-buddy_free:
 			/* The buddy is free: recombine */
 			buf_buddy_remove_from_free(bpage, i);
-buddy_free2:
+buddy_is_free:
 			ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
 			ut_ad(!buf_pool_contains_zip(buddy));
 			i++;
@@ -682,122 +552,43 @@ buddy_free2:
 		}
 
 		ut_a(bpage != buf);
-
-		{
-			buf_page_t*	next = UT_LIST_GET_NEXT(zip_list, bpage);
-			UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
-			bpage = next;
-		}
+		UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
+		bpage = UT_LIST_GET_NEXT(zip_list, bpage);
 	}
 
 #ifndef UNIV_DEBUG_VALGRIND
 buddy_nonfree:
-	/* Valgrind would complain about accessing free memory. */
-	ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-			      ut_ad(buf_page_get_state(ut_list_node_313)
-				    == BUF_BLOCK_ZIP_FREE)));
-#endif /* UNIV_DEBUG_VALGRIND */
+#endif /* !UNIV_DEBUG_VALGRIND */
+
+	ut_d(BUF_BUDDY_LIST_VALIDATE(i));
 
 	/* The buddy is not free. Is there a free block of this size? */
 	bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
 
 	if (bpage) {
+
 		/* Remove the block from the free list, because a successful
 		buf_buddy_relocate() will overwrite bpage->list. */
-
-		UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
 		buf_buddy_remove_from_free(bpage, i);
 
 		/* Try to relocate the buddy of buf to the free block. */
 		if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
 
-			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
-			goto buddy_free2;
+			buddy->state = BUF_BLOCK_ZIP_FREE;
+			goto buddy_is_free;
 		}
 
 		buf_buddy_add_to_free(bpage, i);
-
-		/* Try to relocate the buddy of the free block to buf. */
-		buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
-						    BUF_BUDDY_LOW << i);
-
-#ifndef UNIV_DEBUG_VALGRIND
-		/* Valgrind would complain about accessing free memory. */
-
-		/* The buddy must not be (completely) free, because we
-		always recombine adjacent free blocks.
-
-		(Parts of the buddy can be free in
-		buf_pool->zip_free[j] with j < i.) */
-		ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
-				      ut_ad(buf_page_get_state(
-						    ut_list_node_313)
-					    == BUF_BLOCK_ZIP_FREE
-					    && ut_list_node_313 != buddy)));
-#endif /* !UNIV_DEBUG_VALGRIND */
-
-		if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
-
-			buf = bpage;
-			UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
-			ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
-			goto buddy_free;
-		}
 	}
 
+func_exit:
 	/* Free the block to the buddy list. */
 	bpage = buf;
-#ifdef UNIV_DEBUG
-	if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
-		/* This area has most likely been allocated for at
-		least one compressed-only block descriptor.  Check
-		that there are no live objects in the area.  This is
-		not a complete check: it may yield false positives as
-		well as false negatives.  Also, due to buddy blocks
-		being recombined, it is possible (although unlikely)
-		that this branch is never reached. */
-
-		char* c;
-
-# ifndef UNIV_DEBUG_VALGRIND
-		/* Valgrind would complain about accessing
-		uninitialized memory.  Besides, Valgrind performs a
-		more exhaustive check, at every memory access. */
-		const buf_page_t* b = buf;
-		const buf_page_t* const b_end = (buf_page_t*)
-			((char*) b + (BUF_BUDDY_LOW << i));
-
-		for (; b < b_end; b++) {
-			/* Avoid false positives (and cause false
-			negatives) by checking for b->space < 1000. */
-
-			if ((b->state == BUF_BLOCK_ZIP_PAGE
-			     || b->state == BUF_BLOCK_ZIP_DIRTY)
-			    && b->space > 0 && b->space < 1000) {
-				fprintf(stderr,
-					"buddy dirty %p %u (%u,%u) %p,%lu\n",
-					(void*) b,
-					b->state, b->space, b->offset,
-					buf, i);
-			}
-		}
-# endif /* !UNIV_DEBUG_VALGRIND */
-
-		/* Scramble the block.  This should make any pointers
-		invalid and trigger a segmentation violation.  Because
-		the scrambling can be reversed, it may be possible to
-		track down the object pointing to the freed data by
-		dereferencing the unscrambled bpage->LRU or
-		bpage->list pointers. */
-		for (c = (char*) buf + (BUF_BUDDY_LOW << i);
-		     c-- > (char*) buf; ) {
-			*c = ~*c ^ i;
-		}
-	} else {
-		/* Fill large blocks with a constant pattern. */
-		memset(bpage, i, BUF_BUDDY_LOW << i);
-	}
-#endif /* UNIV_DEBUG */
+
+	/* Fill large blocks with a constant pattern. */
+	ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
+	UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
+
 	bpage->state = BUF_BLOCK_ZIP_FREE;
 	buf_buddy_add_to_free(bpage, i);
 }
diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c
index 40a907b4199..c76973a42ef 100644
--- a/storage/xtradb/buf/buf0buf.c
+++ b/storage/xtradb/buf/buf0buf.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -53,10 +53,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "page0zip.h"
 #include "trx0trx.h"
 #include "srv0start.h"
-#include "que0que.h"
-#include "read0read.h"
-#include "row0row.h"
-#include "ha_prototypes.h"
 
 /* prototypes for new functions added to ha_innodb.cc */
 trx_t* innobase_get_trx();
@@ -314,30 +310,6 @@ read-ahead or flush occurs */
 UNIV_INTERN ibool		buf_debug_prints = FALSE;
 #endif /* UNIV_DEBUG */
 
-/* Buffer pool shared memory segment information */
-typedef	struct buf_shm_info_struct	buf_shm_info_t;
-
-struct buf_shm_info_struct {
-	char	head_str[8];
-	ulint	binary_id;
-	ibool	is_new;		/* during initializing */
-	ibool	clean;		/* clean shutdowned and free */
-	ibool	reusable;	/* reusable */
-	ulint	buf_pool_size;	/* backup value */
-	ulint	page_size;	/* backup value */
-	ulint	frame_offset;	/* offset of the first frame based on chunk->mem */
-	ulint	zip_hash_offset;
-	ulint	zip_hash_n;
-
-	ulint	checksum;
-
-	buf_pool_t	buf_pool_backup;
-	buf_chunk_t	chunk_backup;
-
-	ib_uint64_t	dummy;
-};
-
-#define BUF_SHM_INFO_HEAD "XTRA_SHM"
 #endif /* !UNIV_HOTBACKUP */
 
 /********************************************************************//**
@@ -767,6 +739,10 @@ buf_block_init(
 	block->page.in_flush_list = FALSE;
 	block->page.in_free_list = FALSE;
 #endif /* UNIV_DEBUG */
+	block->page.flush_list.prev = NULL;
+	block->page.flush_list.next = NULL;
+	block->page.zip_list.prev = NULL;
+	block->page.zip_list.next = NULL;
 	block->page.in_LRU_list = FALSE;
 	block->in_unzip_LRU_list = FALSE;
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
@@ -784,45 +760,6 @@ buf_block_init(
 #endif /* UNIV_SYNC_DEBUG */
 }
 
-static
-void
-buf_block_reuse(
-/*============*/
-	buf_block_t*	block,
-	ptrdiff_t	frame_offset)
-{
-	/* block_init */
-	block->frame += frame_offset;
-
-	UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
-
-	block->index = NULL;
-
-#ifdef UNIV_DEBUG
-	/* recreate later */
-	block->page.in_page_hash = FALSE;
-	block->page.in_zip_hash = FALSE;
-#endif /* UNIV_DEBUG */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-	block->n_pointers = 0;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
-	if (block->page.zip.data)
-		block->page.zip.data += frame_offset;
-
-	block->is_hashed = FALSE;
-
-	mutex_create(&block->mutex, SYNC_BUF_BLOCK);
-
-	rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
-	ut_ad(rw_lock_validate(&(block->lock)));
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
 /********************************************************************//**
 Allocates a chunk of buffer frames.
 @return	chunk, or NULL on failure */
@@ -835,190 +772,28 @@ buf_chunk_init(
 {
 	buf_block_t*	block;
 	byte*		frame;
-	ulint		zip_hash_n = 0;
-	ulint		zip_hash_mem_size = 0;
-	hash_table_t*	zip_hash_tmp = NULL;
 	ulint		i;
 	ulint		size_target;
-	buf_shm_info_t*	shm_info = NULL;
 
 	/* Round down to a multiple of page size,
 	although it already should be. */
 	mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
 	size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
-
-	srv_buffer_pool_shm_is_reused = FALSE;
-
-	if (srv_buffer_pool_shm_key) {
-		/* zip_hash size */
-		zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
-		zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
-						  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
-	}
-
 	/* Reserve space for the block descriptors. */
 	mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
 				  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
-	if (srv_buffer_pool_shm_key) {
-		 mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
-					   + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
-		 mem_size += zip_hash_mem_size;
-	}
 
 	chunk->mem_size = mem_size;
-
-	if (srv_buffer_pool_shm_key) {
-		ulint	binary_id;
-		ibool	is_new;
-
-		ut_a(buf_pool->n_chunks == 1);
-
-		fprintf(stderr,
-		"InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n"
-		"InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
-		"InnoDB:   * the mysqld executable between restarts of the server.\n"
-		"InnoDB:   * the value of innodb_buffer_pool_size.\n"
-		"InnoDB:   * the value of innodb_page_size.\n"
-		"InnoDB:   * datafiles created by InnoDB during this session.\n"
-		"InnoDB: Otherwise, data corruption in datafiles may result.\n");
-
-		/* FIXME: This is vague id still */
-		binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
-			  + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
-			  + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
-			  + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
-			  + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
-			  + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
-			  + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
-			  + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
-			  + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
-			  + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
-			  + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
-
-		chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
-
-		if (UNIV_UNLIKELY(chunk->mem == NULL)) {
-			return(NULL);
-		}
-init_again:
-#ifdef UNIV_SET_MEM_TO_ZERO
-		if (is_new) {
-			memset(chunk->mem, '\0', chunk->mem_size);
-		}
-#endif
-		/* for ut_fold_binary_32(), these values should be 32-bit aligned */
-		ut_a(sizeof(buf_shm_info_t) % 4 == 0);
-		ut_a((ulint)chunk->mem % 4 == 0);
-		ut_a(chunk->mem_size % 4 == 0);
-
-		shm_info = chunk->mem;
-
-		zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
-
-		if (is_new) {
-			strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
-			shm_info->binary_id = binary_id;
-			shm_info->is_new = TRUE;	/* changed to FALSE when the initialization is finished */
-			shm_info->clean = FALSE;	/* changed to TRUE when free the segment. */
-			shm_info->reusable = FALSE;	/* changed to TRUE when validation is finished. */
-			shm_info->buf_pool_size = srv_buf_pool_size;
-			shm_info->page_size = srv_page_size;
-			shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
-			shm_info->zip_hash_n = zip_hash_n;
-		} else {
-			ulint	checksum;
-
-			if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
-				fprintf(stderr,
-				"InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
-				return(NULL);
-			}
-			if (shm_info->binary_id != binary_id) {
-				fprintf(stderr,
-				"InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
-				return(NULL);
-			}
-			if (shm_info->is_new) {
-				fprintf(stderr,
-				"InnoDB: Error: The shared memory was not initialized yet.\n");
-				return(NULL);
-			}
-			if (shm_info->buf_pool_size != srv_buf_pool_size) {
-				fprintf(stderr,
-				"InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
-				shm_info->buf_pool_size, srv_buf_pool_size);
-				return(NULL);
-			}
-			if (shm_info->page_size != srv_page_size) {
-				fprintf(stderr,
-				"InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
-				shm_info->page_size, srv_page_size);
-				return(NULL);
-			}
-			if (!shm_info->reusable) {
-				fprintf(stderr,
-				"InnoDB: Warning: The shared memory has unrecoverable contents.\n"
-				"InnoDB: The shared memory segment is initialized.\n");
-				is_new = TRUE;
-				goto init_again;
-			}
-			if (!shm_info->clean) {
-				fprintf(stderr,
-				"InnoDB: Warning: The shared memory was not shut down cleanly.\n"
-				"InnoDB: The shared memory segment is initialized.\n");
-				is_new = TRUE;
-				goto init_again;
-			}
-
-			ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
-			ut_a(shm_info->zip_hash_n == zip_hash_n);
-
-			/* check checksum */
-			if (srv_buffer_pool_shm_checksum) {
-				checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
-							     chunk->mem_size - sizeof(buf_shm_info_t));
-			} else {
-				checksum = BUF_NO_CHECKSUM_MAGIC;
-			}
-
-			if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
-			    && shm_info->checksum != checksum) {
-				fprintf(stderr,
-				"InnoDB: Error: checksum of the shared memory is not match. "
-				"(stored=%lu calculated=%lu)\n",
-				shm_info->checksum, checksum);
-				return(NULL);
-			}
-
-			/* flag to use the segment. */
-			shm_info->clean = FALSE;	/* changed to TRUE when free the segment. */
-		}
-
-		/* init zip_hash contents */
-		if (is_new) {
-			hash_create_init(zip_hash_tmp, zip_hash_n);
-		} else {
-			/* adjust offset is done later */
-			hash_create_reuse(zip_hash_tmp);
-
-			srv_buffer_pool_shm_is_reused = TRUE;
-		}
-	} else {
 	chunk->mem = os_mem_alloc_large(&chunk->mem_size);
 
 	if (UNIV_UNLIKELY(chunk->mem == NULL)) {
 
 		return(NULL);
 	}
-	}
 
 	/* Allocate the block descriptors from
 	the start of the memory block. */
-	if (srv_buffer_pool_shm_key) {
-		chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
-	} else {
 	chunk->blocks = chunk->mem;
-	}
 
 	/* Align a pointer to the first frame.  Note that when
 	os_large_page_size is smaller than UNIV_PAGE_SIZE,
@@ -1026,13 +801,8 @@ init_again:
 	it is bigger, we may allocate more blocks than requested. */
 
 	frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
-	if (srv_buffer_pool_shm_key) {
-		/* reserve zip_hash space and always -1 for reproductibity */
-		chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
-	} else {
 	chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
 		- (frame != chunk->mem);
-	}
 
 	/* Subtract the space needed for block descriptors. */
 	{
@@ -1050,99 +820,6 @@ init_again:
 		chunk->size = size_target;
 	}
 
-	if (shm_info && !(shm_info->is_new)) {
-		/* convert the shared memory segment for reuse */
-		ptrdiff_t	phys_offset;
-		ptrdiff_t	logi_offset;
-		ptrdiff_t	blocks_offset;
-		void*		previous_frame_address;
-
-		if (chunk->size < shm_info->chunk_backup.size) {
-			fprintf(stderr,
-			"InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
-			"InnoDB: Retrying may avoid this situation.\n");
-			shm_info->clean = TRUE; /* release the flag for retrying */
-			return(NULL);
-		}
-
-		chunk->size = shm_info->chunk_backup.size;
-		phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
-		logi_offset = frame - chunk->blocks[0].frame;
-		previous_frame_address = chunk->blocks[0].frame;
-		blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
-
-		if (phys_offset || logi_offset || blocks_offset) {
-			fprintf(stderr,
-			"InnoDB: Buffer pool in the shared memory segment should be converted.\n"
-			"InnoDB: Previous frames in address      : %p\n"
-			"InnoDB: Previous frames were located    : %p\n"
-			"InnoDB: Current frames should be located: %p\n"
-			"InnoDB: Pysical offset                  : %ld (%#lx)\n"
-			"InnoDB: Logical offset (frames)         : %ld (%#lx)\n"
-			"InnoDB: Logical offset (blocks)         : %ld (%#lx)\n",
-				(byte*)chunk->mem + shm_info->frame_offset,
-				chunk->blocks[0].frame, frame,
-				(long) phys_offset, (long) phys_offset,
-                                (long) logi_offset, (long) logi_offset,
-				(long) blocks_offset, (long) blocks_offset);
-		} else {
-			fprintf(stderr,
-			"InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
-		}
-
-		if (phys_offset) {
-			fprintf(stderr,
-			"InnoDB: Aligning physical offset...");
-
-			memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
-				chunk->size * UNIV_PAGE_SIZE);
-
-			fprintf(stderr,
-			" Done.\n");
-		}
-
-		/* buf_block_t */
-		block = chunk->blocks;
-		for (i = chunk->size; i--; ) {
-			buf_block_reuse(block, logi_offset);
-			block++;
-		}
-
-		if (logi_offset || blocks_offset) {
-			fprintf(stderr,
-			"InnoDB: Aligning logical offset...");
-
-
-			/* buf_pool_t buf_pool_backup */
-			UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
-					previous_frame_address, logi_offset, blocks_offset);
-			UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
-					previous_frame_address, logi_offset, blocks_offset);
-			UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
-					previous_frame_address, logi_offset, blocks_offset);
-			if (shm_info->buf_pool_backup.LRU_old)
-				shm_info->buf_pool_backup.LRU_old =
-					(buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
-						+ (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
-						  ? logi_offset : blocks_offset));
-
-			UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
-					previous_frame_address, logi_offset, blocks_offset);
-
-			UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
-					previous_frame_address, logi_offset, blocks_offset);
-			for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
-				UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
-					previous_frame_address, logi_offset, blocks_offset);
-			}
-
-			HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
-					previous_frame_address, logi_offset, blocks_offset);
-
-			fprintf(stderr,
-			" Done.\n");
-		}
-	} else {
 	/* Init block structs and assign frames for them. Then we
 	assign the frames to the first blocks (we already mapped the
 	memory above). */
@@ -1166,11 +843,6 @@ init_again:
 		block++;
 		frame += UNIV_PAGE_SIZE;
 	}
-	}
-
-	if (shm_info) {
-		shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
-	}
 
 	return(chunk);
 }
@@ -1287,76 +959,6 @@ buf_chunk_not_freed(
 	return(NULL);
 }
 
-/*********************************************************************//**
-Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
-@return	TRUE if all freed */
-static
-ibool
-buf_chunk_all_free(
-/*===============*/
-	const buf_chunk_t*	chunk)	/*!< in: chunk being checked */
-{
-	const buf_block_t*	block;
-	ulint			i;
-
-	ut_ad(buf_pool);
-	ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
-
-	block = chunk->blocks;
-
-	for (i = chunk->size; i--; block++) {
-
-		if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
-
-			return(FALSE);
-		}
-	}
-
-	return(TRUE);
-}
-
-/********************************************************************//**
-Frees a chunk of buffer frames. */
-static
-void
-buf_chunk_free(
-/*===========*/
-	buf_chunk_t*	chunk)		/*!< out: chunk of buffers */
-{
-	buf_block_t*		block;
-	const buf_block_t*	block_end;
-
-	ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
-
-	block_end = chunk->blocks + chunk->size;
-
-	for (block = chunk->blocks; block < block_end; block++) {
-		ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
-		ut_a(!block->page.zip.data);
-
-		ut_ad(!block->page.in_LRU_list);
-		ut_ad(!block->in_unzip_LRU_list);
-		ut_ad(!block->page.in_flush_list);
-		/* Remove the block from the free list. */
-		mutex_enter(&free_list_mutex);
-		ut_ad(block->page.in_free_list);
-		UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
-		mutex_exit(&free_list_mutex);
-
-		/* Free the latches. */
-		mutex_free(&block->mutex);
-		rw_lock_free(&block->lock);
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_free(&block->debug_latch);
-#endif /* UNIV_SYNC_DEBUG */
-		UNIV_MEM_UNDESC(block);
-	}
-
-	ut_a(!srv_buffer_pool_shm_key);
-
-	os_mem_free_large(chunk->mem, chunk->mem_size);
-}
-
 /********************************************************************//**
 Creates the buffer pool.
 @return	own: buf_pool object, NULL if not enough memory or error */
@@ -1403,10 +1005,7 @@ buf_pool_init(void)
 	srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
 
 	buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
-	/* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
-	if (!srv_buffer_pool_shm_key) {
 	buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
-	}
 
 	buf_pool->last_printout_time = time(NULL);
 
@@ -1421,86 +1020,6 @@ buf_pool_init(void)
 	--------------------------- */
 	/* All fields are initialized by mem_zalloc(). */
 
-	if (srv_buffer_pool_shm_key) {
-		buf_shm_info_t*	shm_info;
-
-		ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
-		shm_info = chunk->mem;
-
-		buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
-
-		if(shm_info->is_new) {
-			shm_info->is_new = FALSE; /* initialization was finished */
-		} else {
-			buf_block_t*	block = chunk->blocks;
-			buf_page_t*	b;
-
-			/* shm_info->buf_pool_backup should be converted */
-			/* at buf_chunk_init(). So copy simply. */
-			buf_pool->flush_list 		= shm_info->buf_pool_backup.flush_list;
-			buf_pool->freed_page_clock 	= shm_info->buf_pool_backup.freed_page_clock;
-			buf_pool->free			= shm_info->buf_pool_backup.free;
-			buf_pool->LRU			= shm_info->buf_pool_backup.LRU;
-			buf_pool->LRU_old		= shm_info->buf_pool_backup.LRU_old;
-			buf_pool->LRU_old_len		= shm_info->buf_pool_backup.LRU_old_len;
-			buf_pool->unzip_LRU		= shm_info->buf_pool_backup.unzip_LRU;
-			buf_pool->zip_clean		= shm_info->buf_pool_backup.zip_clean;
-			for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
-				buf_pool->zip_free[i]	= shm_info->buf_pool_backup.zip_free[i];
-			}
-
-			for (i = 0; i < chunk->size; i++, block++) {
-				if (buf_block_get_state(block)
-				    == BUF_BLOCK_FILE_PAGE) {
-					ut_d(block->page.in_page_hash = TRUE);
-					HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-						    buf_page_address_fold(
-							    block->page.space,
-							    block->page.offset),
-						    &block->page);
-				}
-			}
-
-			for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-			     b = UT_LIST_GET_NEXT(zip_list, b)) {
-				ut_ad(!b->in_flush_list);
-				ut_ad(b->in_LRU_list);
-
-				ut_d(b->in_page_hash = TRUE);
-				HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-					    buf_page_address_fold(b->space, b->offset), b);
-			}
-
-			for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-			     b = UT_LIST_GET_NEXT(flush_list, b)) {
-				ut_ad(b->in_flush_list);
-				ut_ad(b->in_LRU_list);
-
-				switch (buf_page_get_state(b)) {
-				case BUF_BLOCK_ZIP_DIRTY:
-					ut_d(b->in_page_hash = TRUE);
-					HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
-						    buf_page_address_fold(b->space,
-							    		  b->offset), b);
-					break;
-				case BUF_BLOCK_FILE_PAGE:
-					/* uncompressed page */
-					break;
-				case BUF_BLOCK_ZIP_FREE:
-				case BUF_BLOCK_ZIP_PAGE:
-				case BUF_BLOCK_NOT_USED:
-				case BUF_BLOCK_READY_FOR_USE:
-				case BUF_BLOCK_MEMORY:
-				case BUF_BLOCK_REMOVE_HASH:
-					ut_error;
-					break;
-				}
-			}
-
-
-		}
-	}
-
 	mutex_exit(&LRU_list_mutex);
 	rw_lock_x_unlock(&page_hash_latch);
 	buf_pool_mutex_exit();
@@ -1525,49 +1044,16 @@ buf_pool_free(void)
 	buf_chunk_t*	chunk;
 	buf_chunk_t*	chunks;
 
-	if (srv_buffer_pool_shm_key) {
-		buf_shm_info_t*	shm_info;
-
-		ut_a(buf_pool->n_chunks == 1);
-
-		chunk = buf_pool->chunks;
-		shm_info = chunk->mem;
-		ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
-
-		/* validation the shared memory segment doesn't have unrecoverable contents. */
-		/* Currently, validation became not needed */
-		shm_info->reusable = TRUE;
-
-		memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
-		memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
-
-		if (srv_fast_shutdown < 2) {
-			if (srv_buffer_pool_shm_checksum) {
-				shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
-								       chunk->mem_size - sizeof(buf_shm_info_t));
-			} else {
-				shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
-			}
-			shm_info->clean = TRUE;
-		}
-
-		os_shm_free(chunk->mem, chunk->mem_size);
-	} else {
 	chunks = buf_pool->chunks;
 	chunk = chunks + buf_pool->n_chunks;
 
 	while (--chunk >= chunks) {
-		/* Bypass the checks of buf_chunk_free(), since they
-		would fail at shutdown. */
 		os_mem_free_large(chunk->mem, chunk->mem_size);
 	}
-	}
 
 	mem_free(buf_pool->chunks);
 	hash_table_free(buf_pool->page_hash);
-	if (!srv_buffer_pool_shm_key) {
 	hash_table_free(buf_pool->zip_hash);
-	}
 	mem_free(buf_pool);
 	buf_pool = NULL;
 }
@@ -1741,335 +1227,6 @@ buf_relocate(
 }
 
 /********************************************************************//**
-Shrinks the buffer pool. */
-static
-void
-buf_pool_shrink(
-/*============*/
-	ulint	chunk_size)	/*!< in: number of pages to remove */
-{
-	buf_chunk_t*	chunks;
-	buf_chunk_t*	chunk;
-	ulint		max_size;
-	ulint		max_free_size;
-	buf_chunk_t*	max_chunk;
-	buf_chunk_t*	max_free_chunk;
-
-	ut_ad(!buf_pool_mutex_own());
-
-try_again:
-	btr_search_disable(); /* Empty the adaptive hash index again */
-	//buf_pool_mutex_enter();
-	mutex_enter(&LRU_list_mutex);
-
-	if (srv_buffer_pool_shm_key) {
-		/* Cannot support shrink */
-		goto func_done;
-	}
-
-shrink_again:
-	if (buf_pool->n_chunks <= 1) {
-
-		/* Cannot shrink if there is only one chunk */
-		goto func_done;
-	}
-
-	/* Search for the largest free chunk
-	not larger than the size difference */
-	chunks = buf_pool->chunks;
-	chunk = chunks + buf_pool->n_chunks;
-	max_size = max_free_size = 0;
-	max_chunk = max_free_chunk = NULL;
-
-	while (--chunk >= chunks) {
-		if (chunk->size <= chunk_size
-		    && chunk->size > max_free_size) {
-			if (chunk->size > max_size) {
-				max_size = chunk->size;
-				max_chunk = chunk;
-			}
-
-			if (buf_chunk_all_free(chunk)) {
-				max_free_size = chunk->size;
-				max_free_chunk = chunk;
-			}
-		}
-	}
-
-	if (!max_free_size) {
-
-		ulint		dirty	= 0;
-		ulint		nonfree	= 0;
-		buf_block_t*	block;
-		buf_block_t*	bend;
-
-		/* Cannot shrink: try again later
-		(do not assign srv_buf_pool_old_size) */
-		if (!max_chunk) {
-
-			goto func_exit;
-		}
-
-		block = max_chunk->blocks;
-		bend = block + max_chunk->size;
-
-		/* Move the blocks of chunk to the end of the
-		LRU list and try to flush them. */
-		for (; block < bend; block++) {
-			switch (buf_block_get_state(block)) {
-			case BUF_BLOCK_NOT_USED:
-				continue;
-			case BUF_BLOCK_FILE_PAGE:
-				break;
-			default:
-				nonfree++;
-				continue;
-			}
-
-			mutex_enter(&block->mutex);
-			/* The following calls will temporarily
-			release block->mutex and buf_pool_mutex.
-			Therefore, we have to always retry,
-			even if !dirty && !nonfree. */
-
-			if (!buf_flush_ready_for_replace(&block->page)) {
-
-				buf_LRU_make_block_old(&block->page);
-				dirty++;
-			} else if (buf_LRU_free_block(&block->page, TRUE, FALSE)
-				   != BUF_LRU_FREED) {
-				nonfree++;
-			}
-
-			mutex_exit(&block->mutex);
-		}
-
-		//buf_pool_mutex_exit();
-		mutex_exit(&LRU_list_mutex);
-
-		/* Request for a flush of the chunk if it helps.
-		Do not flush if there are non-free blocks, since
-		flushing will not make the chunk freeable. */
-		if (nonfree) {
-			/* Avoid busy-waiting. */
-			os_thread_sleep(100000);
-		} else if (dirty
-			   && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
-			   == ULINT_UNDEFINED) {
-
-			buf_flush_wait_batch_end(BUF_FLUSH_LRU);
-		}
-
-		goto try_again;
-	}
-
-	max_size = max_free_size;
-	max_chunk = max_free_chunk;
-
-	srv_buf_pool_old_size = srv_buf_pool_size;
-
-	/* Rewrite buf_pool->chunks.  Copy everything but max_chunk. */
-	chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
-	memcpy(chunks, buf_pool->chunks,
-	       (max_chunk - buf_pool->chunks) * sizeof *chunks);
-	memcpy(chunks + (max_chunk - buf_pool->chunks),
-	       max_chunk + 1,
-	       buf_pool->chunks + buf_pool->n_chunks
-	       - (max_chunk + 1));
-	ut_a(buf_pool->curr_size > max_chunk->size);
-	buf_pool->curr_size -= max_chunk->size;
-	srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
-	chunk_size -= max_chunk->size;
-	buf_chunk_free(max_chunk);
-	mem_free(buf_pool->chunks);
-	buf_pool->chunks = chunks;
-	buf_pool->n_chunks--;
-
-	/* Allow a slack of one megabyte. */
-	if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
-
-		goto shrink_again;
-	}
-
-func_done:
-	srv_buf_pool_old_size = srv_buf_pool_size;
-func_exit:
-	//buf_pool_mutex_exit();
-	mutex_exit(&LRU_list_mutex);
-	btr_search_enable();
-}
-
-/********************************************************************//**
-Rebuild buf_pool->page_hash. */
-static
-void
-buf_pool_page_hash_rebuild(void)
-/*============================*/
-{
-	ulint		i;
-	ulint		n_chunks;
-	buf_chunk_t*	chunk;
-	hash_table_t*	page_hash;
-	hash_table_t*	zip_hash;
-	buf_page_t*	b;
-
-	//buf_pool_mutex_enter();
-	mutex_enter(&LRU_list_mutex);
-	rw_lock_x_lock(&page_hash_latch);
-	mutex_enter(&flush_list_mutex);
-	
-
-	/* Free, create, and populate the hash table. */
-	hash_table_free(buf_pool->page_hash);
-	buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
-	zip_hash = hash_create(2 * buf_pool->curr_size);
-
-	HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
-		     BUF_POOL_ZIP_FOLD_BPAGE);
-
-	hash_table_free(buf_pool->zip_hash);
-	buf_pool->zip_hash = zip_hash;
-
-	/* Insert the uncompressed file pages to buf_pool->page_hash. */
-
-	chunk = buf_pool->chunks;
-	n_chunks = buf_pool->n_chunks;
-
-	for (i = 0; i < n_chunks; i++, chunk++) {
-		ulint		j;
-		buf_block_t*	block = chunk->blocks;
-
-		for (j = 0; j < chunk->size; j++, block++) {
-			if (buf_block_get_state(block)
-			    == BUF_BLOCK_FILE_PAGE) {
-				ut_ad(!block->page.in_zip_hash);
-				ut_ad(block->page.in_page_hash);
-
-				HASH_INSERT(buf_page_t, hash, page_hash,
-					    buf_page_address_fold(
-						    block->page.space,
-						    block->page.offset),
-					    &block->page);
-			}
-		}
-	}
-
-	/* Insert the compressed-only pages to buf_pool->page_hash.
-	All such blocks are either in buf_pool->zip_clean or
-	in buf_pool->flush_list. */
-
-	for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-	     b = UT_LIST_GET_NEXT(zip_list, b)) {
-		ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
-		ut_ad(!b->in_flush_list);
-		ut_ad(b->in_LRU_list);
-		ut_ad(b->in_page_hash);
-		ut_ad(!b->in_zip_hash);
-
-		HASH_INSERT(buf_page_t, hash, page_hash,
-			    buf_page_address_fold(b->space, b->offset), b);
-	}
-
-	for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-	     b = UT_LIST_GET_NEXT(flush_list, b)) {
-		ut_ad(b->in_flush_list);
-		ut_ad(b->in_LRU_list);
-		ut_ad(b->in_page_hash);
-		ut_ad(!b->in_zip_hash);
-
-		switch (buf_page_get_state(b)) {
-		case BUF_BLOCK_ZIP_DIRTY:
-			HASH_INSERT(buf_page_t, hash, page_hash,
-				    buf_page_address_fold(b->space,
-							  b->offset), b);
-			break;
-		case BUF_BLOCK_FILE_PAGE:
-			/* uncompressed page */
-			break;
-		case BUF_BLOCK_ZIP_FREE:
-		case BUF_BLOCK_ZIP_PAGE:
-		case BUF_BLOCK_NOT_USED:
-		case BUF_BLOCK_READY_FOR_USE:
-		case BUF_BLOCK_MEMORY:
-		case BUF_BLOCK_REMOVE_HASH:
-			ut_error;
-			break;
-		}
-	}
-
-	//buf_pool_mutex_exit();
-	mutex_exit(&LRU_list_mutex);
-	rw_lock_x_unlock(&page_hash_latch);
-	mutex_exit(&flush_list_mutex);
-}
-
-/********************************************************************//**
-Resizes the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_resize(void)
-/*=================*/
-{
-	if (srv_buffer_pool_shm_key) {
-		/* Cannot support resize */
-		return;
-	}
-
-	//buf_pool_mutex_enter();
-	mutex_enter(&LRU_list_mutex);
-
-	if (srv_buf_pool_old_size == srv_buf_pool_size) {
-
-		//buf_pool_mutex_exit();
-		mutex_exit(&LRU_list_mutex);
-		return;
-	}
-
-	if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
-
-		//buf_pool_mutex_exit();
-		mutex_exit(&LRU_list_mutex);
-
-		/* Disable adaptive hash indexes and empty the index
-		in order to free up memory in the buffer pool chunks. */
-		buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
-				/ UNIV_PAGE_SIZE);
-	} else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
-
-		/* Enlarge the buffer pool by at least one megabyte */
-
-		ulint		mem_size
-			= srv_buf_pool_size - srv_buf_pool_curr_size;
-		buf_chunk_t*	chunks;
-		buf_chunk_t*	chunk;
-
-		chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
-
-		memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
-		       * sizeof *chunks);
-
-		chunk = &chunks[buf_pool->n_chunks];
-
-		if (!buf_chunk_init(chunk, mem_size)) {
-			mem_free(chunks);
-		} else {
-			buf_pool->curr_size += chunk->size;
-			srv_buf_pool_curr_size = buf_pool->curr_size
-				* UNIV_PAGE_SIZE;
-			mem_free(buf_pool->chunks);
-			buf_pool->chunks = chunks;
-			buf_pool->n_chunks++;
-		}
-
-		srv_buf_pool_old_size = srv_buf_pool_size;
-		//buf_pool_mutex_exit();
-		mutex_exit(&LRU_list_mutex);
-	}
-
-	buf_pool_page_hash_rebuild();
-}
-
-/********************************************************************//**
 Moves a page to the start of the buffer pool LRU list. This high-level
 function can be used to prevent an important page from slipping out of
 the buffer pool. */
@@ -2303,6 +1460,27 @@ lookup:
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 	}
 
+	if (UNIV_UNLIKELY(bpage->space_was_being_deleted)) {
+		/* This page is obsoleted, should discard and retry */
+		rw_lock_s_unlock(&page_hash_latch);
+
+		mutex_enter(&LRU_list_mutex);
+		block_mutex = buf_page_get_mutex_enter(bpage);
+
+		if (UNIV_UNLIKELY(!block_mutex)) {
+			mutex_exit(&LRU_list_mutex);
+			goto lookup;
+		}
+
+		buf_LRU_free_block(bpage, TRUE, TRUE);
+
+		mutex_exit(&LRU_list_mutex);
+		mutex_exit(block_mutex);
+		block_mutex = NULL;
+
+		goto lookup;
+	}
+
 	if (UNIV_UNLIKELY(!bpage->zip.data)) {
 		/* There is no compressed page. */
 err_exit:
@@ -2311,13 +1489,12 @@ err_exit:
 		return(NULL);
 	}
 
-	if (srv_pass_corrupt_table) {
+	if (srv_pass_corrupt_table <= 1) {
 		if (bpage->is_corrupt) {
 			rw_lock_s_unlock(&page_hash_latch);
 			return(NULL);
 		}
 	}
-	ut_a(!(bpage->is_corrupt));
 
 	block_mutex = buf_page_get_mutex_enter(bpage);
 
@@ -2340,13 +1517,32 @@ err_exit:
 	case BUF_BLOCK_FILE_PAGE:
 		ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
 
-		/* Discard the uncompressed page frame if possible. */
-		if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) {
+		/* release mutex to obey to latch-order */
+		mutex_exit(block_mutex);
+
+		/* get LRU_list_mutex for buf_LRU_free_block() */
+		mutex_enter(&LRU_list_mutex);
+		mutex_enter(block_mutex);
 
+		if (UNIV_UNLIKELY(bpage->space != space
+				  || bpage->offset != offset
+				  || !bpage->in_LRU_list
+				  || !bpage->zip.data)) {
+			/* someone should interrupt, retry */
+			mutex_exit(&LRU_list_mutex);
+			mutex_exit(block_mutex);
+			goto lookup;
+		}
+
+		/* Discard the uncompressed page frame if possible. */
+		if (buf_LRU_free_block(bpage, FALSE, TRUE)) {
+			mutex_exit(&LRU_list_mutex);
 			mutex_exit(block_mutex);
 			goto lookup;
 		}
 
+		mutex_exit(&LRU_list_mutex);
+
 		buf_block_buf_fix_inc((buf_block_t*) bpage,
 				      __FILE__, __LINE__);
 		goto got_block;
@@ -2516,16 +1712,19 @@ buf_block_align(
 	/* TODO: protect buf_pool->chunks with a mutex (it will
 	currently remain constant after buf_pool_init()) */
 	for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
-		lint	offs = ptr - chunk->blocks->frame;
+		ulint	offs;
 
-		if (UNIV_UNLIKELY(offs < 0)) {
+		if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
 
 			continue;
 		}
+		/* else */
+
+		offs = ptr - chunk->blocks->frame;
 
 		offs >>= UNIV_PAGE_SIZE_SHIFT;
 
-		if (UNIV_LIKELY((ulint) offs < chunk->size)) {
+		if (UNIV_LIKELY(offs < chunk->size)) {
 			buf_block_t*	block = &chunk->blocks[offs];
 
 			/* The function buf_chunk_init() invokes
@@ -2651,7 +1850,7 @@ buf_page_get_gen(
 	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
 	buf_block_t*	guess,	/*!< in: guessed block or NULL */
 	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_GET_NO_LATCH */
+				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mini-transaction */
@@ -2661,7 +1860,7 @@ buf_page_get_gen(
 	ulint		fix_type;
 	ibool		must_read;
 	ulint		retries = 0;
-	mutex_t*	block_mutex= NULL;
+	mutex_t*	block_mutex = NULL;
 	trx_t*          trx = NULL;
 	ulint           sec;
 	ulint           ms;
@@ -2673,9 +1872,19 @@ buf_page_get_gen(
 	ut_ad((rw_latch == RW_S_LATCH)
 	      || (rw_latch == RW_X_LATCH)
 	      || (rw_latch == RW_NO_LATCH));
-	ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
-	ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
-	      || (mode == BUF_GET_NO_LATCH));
+#ifdef UNIV_DEBUG
+	switch (mode) {
+	case BUF_GET_NO_LATCH:
+		ut_ad(rw_latch == RW_NO_LATCH);
+		break;
+	case BUF_GET:
+	case BUF_GET_IF_IN_POOL:
+	case BUF_PEEK_IF_IN_POOL:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
 	ut_ad(zip_size == fil_space_get_zip_size(space));
 	ut_ad(ut_is_2pow(zip_size));
 #ifndef UNIV_LOG_DEBUG
@@ -2693,13 +1902,8 @@ loop:
 		block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
 
 		/* If the guess is a compressed page descriptor that
-		has been allocated by buf_buddy_alloc(), it may have
-		been invalidated by buf_buddy_relocate().  In that
-		case, block could point to something that happens to
-		contain the expected bits in block->page.  Similarly,
-		the guess may be pointing to a buffer pool chunk that
-		has been released when resizing the buffer pool. */
-
+		has been allocated by buf_page_alloc_descriptor(),
+		it may have been freed by buf_relocate(). */
 		if (!block_mutex) {
 			block = guess = NULL;
 		} else if (!buf_block_is_uncompressed(block)
@@ -2720,6 +1924,27 @@ loop:
 		rw_lock_s_lock(&page_hash_latch);
 		block = (buf_block_t*) buf_page_hash_get(space, offset);
 		if (block) {
+			if (UNIV_UNLIKELY(block->page.space_was_being_deleted)) {
+				/* This page is obsoleted, should discard and retry */
+				rw_lock_s_unlock(&page_hash_latch);
+
+				mutex_enter(&LRU_list_mutex);
+				block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
+
+				if (UNIV_UNLIKELY(!block_mutex)) {
+					mutex_exit(&LRU_list_mutex);
+					goto loop;
+				}
+
+				buf_LRU_free_block((buf_page_t*)block, TRUE, TRUE);
+
+				mutex_exit(&LRU_list_mutex);
+				mutex_exit(block_mutex);
+				block_mutex = NULL;
+
+				goto loop;
+			}
+
 			block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
 			ut_a(block_mutex);
 		}
@@ -2732,7 +1957,8 @@ loop2:
 
 		//buf_pool_mutex_exit();
 
-		if (mode == BUF_GET_IF_IN_POOL) {
+		if (mode == BUF_GET_IF_IN_POOL
+		    || mode == BUF_PEEK_IF_IN_POOL) {
 
 			return(NULL);
 		}
@@ -2771,7 +1997,8 @@ loop2:
 
 	must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
 
-	if (must_read && mode == BUF_GET_IF_IN_POOL) {
+	if (must_read && (mode == BUF_GET_IF_IN_POOL
+			  || mode == BUF_PEEK_IF_IN_POOL)) {
 		/* The page is only being read to buffer */
 		//buf_pool_mutex_exit();
 		mutex_exit(block_mutex);
@@ -2779,13 +2006,12 @@ loop2:
 		return(NULL);
 	}
 
-	if (srv_pass_corrupt_table) {
+	if (srv_pass_corrupt_table <= 1) {
 		if (block->page.is_corrupt) {
 			mutex_exit(block_mutex);
 			return(NULL);
 		}
 	}
-	ut_a(!(block->page.is_corrupt));
 
 	switch (buf_block_get_state(block)) {
 		buf_page_t*	bpage;
@@ -2897,8 +2123,10 @@ wait_until_unfixed:
 
 		if (buf_page_get_state(&block->page)
 		    == BUF_BLOCK_ZIP_PAGE) {
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 			UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
 				       &block->page);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 			ut_ad(!block->page.in_flush_list);
 		} else {
 			/* Relocate buf_pool->flush_list. */
@@ -2931,10 +2159,10 @@ wait_until_unfixed:
 		buf_pool->n_pend_unzip++;
 		mutex_exit(&buf_pool_mutex);
 
-		buf_buddy_free(bpage, sizeof *bpage, FALSE);
-
 		//buf_pool_mutex_exit();
 
+		buf_page_free_descriptor(bpage);
+
 		/* Decompress the page and apply buffered operations
 		while not holding buf_pool_mutex or block->mutex. */
 		success = buf_zip_decompress(block, srv_use_checksums);
@@ -2981,9 +2209,9 @@ wait_until_unfixed:
 		/* Try to evict the block from the buffer pool, to use the
 		insert buffer as much as possible. */
 
-		if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) {
-			buf_pool_mutex_exit();
-			mutex_exit(&block->mutex);
+		if (buf_LRU_free_block(&block->page, TRUE, FALSE)) {
+			//buf_pool_mutex_exit();
+			mutex_exit(block_mutex);
 			fprintf(stderr,
 				"innodb_change_buffering_debug evict %u %u\n",
 				(unsigned) space, (unsigned) offset);
@@ -3011,7 +2239,9 @@ wait_until_unfixed:
 	//buf_pool_mutex_exit();
 	mutex_exit(block_mutex);
 
-	buf_page_set_accessed_make_young(&block->page, access_time);
+	if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
+		buf_page_set_accessed_make_young(&block->page, access_time);
+	}
 
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 	ut_a(!block->page.file_page_was_freed);
@@ -3077,7 +2307,7 @@ wait_until_unfixed:
 
 	mtr_memo_push(mtr, block, fix_type);
 
-	if (!access_time) {
+	if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -3531,6 +2761,7 @@ buf_page_init_for_read(
 {
 	buf_block_t*	block;
 	buf_page_t*	bpage;
+	buf_page_t*	bpage_in_bp;
 	mtr_t		mtr;
 	ibool		lru	= FALSE;
 	void*		data;
@@ -3566,11 +2797,29 @@ buf_page_init_for_read(
 		ut_ad(block);
 	}
 
+retry:
 	//buf_pool_mutex_enter();
 	mutex_enter(&LRU_list_mutex);
 	rw_lock_x_lock(&page_hash_latch);
 
-	if (buf_page_hash_get(space, offset)) {
+	bpage_in_bp = buf_page_hash_get(space, offset);
+
+	if (UNIV_UNLIKELY(bpage_in_bp && bpage_in_bp->space_was_being_deleted)) {
+		mutex_t*	block_mutex = buf_page_get_mutex_enter(bpage_in_bp);
+
+		/* This page is obsoleted, should discard and retry */
+		rw_lock_x_unlock(&page_hash_latch);
+		ut_a(block_mutex);
+
+		buf_LRU_free_block(bpage_in_bp, TRUE, TRUE);
+
+		mutex_exit(&LRU_list_mutex);
+		mutex_exit(block_mutex);
+
+		goto retry;
+	}
+
+	if (bpage_in_bp) {
 		/* The page is already in the buffer pool. */
 err_exit:
 		if (block) {
@@ -3648,17 +2897,12 @@ err_exit:
 		mutex_exit(&LRU_list_mutex);
 		mutex_exit(&block->mutex);
 	} else {
-		/* Defer buf_buddy_alloc() until after the block has
-		been found not to exist.  The buf_buddy_alloc() and
-		buf_buddy_free() calls may be expensive because of
-		buf_buddy_relocate(). */
 
 		/* The compressed page must be allocated before the
 		control block (bpage), in order to avoid the
 		invocation of buf_buddy_relocate_block() on
 		uninitialized data. */
 		data = buf_buddy_alloc(zip_size, &lru, TRUE);
-		bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
 
 		/* If buf_buddy_alloc() allocated storage from the LRU list,
 		it released and reacquired buf_pool_mutex.  Thus, we must
@@ -3666,17 +2910,16 @@ err_exit:
 		if (UNIV_UNLIKELY(lru)
 		    && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
 
-			/* The block was added by some other thread. */
-			buf_buddy_free(bpage, sizeof *bpage, TRUE);
 			buf_buddy_free(data, zip_size, TRUE);
 
 			mutex_exit(&LRU_list_mutex);
 			rw_lock_x_unlock(&page_hash_latch);
-
 			bpage = NULL;
 			goto func_exit;
 		}
 
+		bpage = buf_page_alloc_descriptor();
+
 		page_zip_des_init(&bpage->zip);
 		page_zip_set_size(&bpage->zip, zip_size);
 		bpage->zip.data = data;
@@ -3706,9 +2949,11 @@ err_exit:
 
 		/* The block must be put to the LRU list, to the old blocks */
 		buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		mutex_enter(&flush_list_mutex);
 		buf_LRU_insert_zip_clean(bpage);
 		mutex_exit(&flush_list_mutex);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 		mutex_exit(&LRU_list_mutex);
 
@@ -3759,12 +3004,28 @@ buf_page_create(
 
 	free_block = buf_LRU_get_free_block();
 
+retry:
 	//buf_pool_mutex_enter();
 	mutex_enter(&LRU_list_mutex);
 	rw_lock_x_lock(&page_hash_latch);
 
 	block = (buf_block_t*) buf_page_hash_get(space, offset);
 
+	if (UNIV_UNLIKELY(block && block->page.space_was_being_deleted)) {
+		mutex_t*	block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
+
+		/* This page is obsoleted, should discard and retry */
+		rw_lock_x_unlock(&page_hash_latch);
+		ut_a(block_mutex);
+
+		buf_LRU_free_block((buf_page_t*)block, TRUE, TRUE);
+
+		mutex_exit(&LRU_list_mutex);
+		mutex_exit(block_mutex);
+
+		goto retry;
+	}
+
 	if (block && buf_page_in_file(&block->page)) {
 #ifdef UNIV_IBUF_COUNT_DEBUG
 		ut_a(ibuf_count_get(space, offset) == 0);
@@ -3889,8 +3150,7 @@ UNIV_INTERN
 void
 buf_page_io_complete(
 /*=================*/
-	buf_page_t*	bpage,	/*!< in: pointer to the block in question */
-	trx_t*		trx)
+	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
 	enum buf_io_fix	io_type;
 	const ibool	uncompressed = (buf_page_get_state(bpage)
@@ -4011,14 +3271,18 @@ corrupt:
 
 			if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space)
 			    && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
+				trx_t*	trx;
+
 				fprintf(stderr,
 					"InnoDB: space %u will be treated as corrupt.\n",
 					bpage->space);
 				fil_space_set_corrupt(bpage->space);
-				if (trx && trx->dict_operation_lock_mode == 0) {
-					dict_table_set_corrupt_by_space(bpage->space, TRUE);
-				} else {
+
+				trx = innobase_get_trx();
+				if (trx && trx->dict_operation_lock_mode == RW_X_LATCH) {
 					dict_table_set_corrupt_by_space(bpage->space, FALSE);
+				} else {
+					dict_table_set_corrupt_by_space(bpage->space, TRUE);
 				}
 				bpage->is_corrupt = TRUE;
 			} else
@@ -4781,12 +4045,16 @@ buf_print_io(
 
 	/* Statistics about read ahead algorithm */
 	fprintf(file, "Pages read ahead %.2f/s,"
-		" evicted without access %.2f/s\n",
+		" evicted without access %.2f/s,"
+		" Random read ahead %.2f/s\n",
 		(buf_pool->stat.n_ra_pages_read
 		- buf_pool->old_stat.n_ra_pages_read)
 		/ time_elapsed,
 		(buf_pool->stat.n_ra_pages_evicted
 		- buf_pool->old_stat.n_ra_pages_evicted)
+		/ time_elapsed,
+		(buf_pool->stat.n_ra_pages_read_rnd
+		- buf_pool->old_stat.n_ra_pages_read_rnd)
 		/ time_elapsed);
 
 	/* Print some values to help us with visualizing what is
diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c
index cda8d3b170e..0ea3ed29d2b 100644
--- a/storage/xtradb/buf/buf0flu.c
+++ b/storage/xtradb/buf/buf0flu.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -459,7 +459,9 @@ buf_flush_remove(
 	case BUF_BLOCK_ZIP_DIRTY:
 		buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
 		UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		buf_LRU_insert_zip_clean(bpage);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 		break;
 	case BUF_BLOCK_FILE_PAGE:
 		UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
@@ -769,7 +771,7 @@ corrupted_page:
 flush:
 	/* Now flush the doublewrite buffer data to disk */
 
-	fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
+	fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE, FALSE);
 
 	/* We know that the writes have been flushed to disk now
 	and in recovery we will find them in the doublewrite buffer
@@ -1084,7 +1086,7 @@ buf_flush_page_try(
 /*===============*/
 	buf_block_t*	block)		/*!< in/out: buffer control block */
 {
-	ut_ad(buf_pool_mutex_own());
+	//ut_ad(buf_pool_mutex_own());
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(mutex_own(&block->mutex));
 
@@ -1092,8 +1094,11 @@ buf_flush_page_try(
 		return(FALSE);
 	}
 
+	buf_pool_mutex_enter();
+
 	if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
 	    || buf_pool->init_flush[BUF_FLUSH_LRU]) {
+		buf_pool_mutex_exit();
 		/* There is already a flush batch of the same type running */
 		return(FALSE);
 	}
diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c
index 583eec9bd9c..df74ccf500f 100644
--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -259,18 +259,20 @@ buf_LRU_drop_page_hash_for_tablespace(
 			     * BUF_LRU_DROP_SEARCH_HASH_SIZE);
 	//buf_pool_mutex_enter();
 	mutex_enter(&LRU_list_mutex);
+	num_entries = 0;
 
 scan_again:
-	num_entries = 0;
 	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
 	while (bpage != NULL) {
+		/* bpage->state,space,io_fix,buf_fix_count are protected by block_mutex at XtraDB */
 		mutex_t*	block_mutex = buf_page_get_mutex_enter(bpage);
 		buf_page_t*	prev_bpage;
+		ibool		is_fixed;
 
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		if (!block_mutex) {
+		if (UNIV_UNLIKELY(!block_mutex)) {
 			goto next_page;
 		}
 
@@ -278,59 +280,77 @@ scan_again:
 
 		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
 		    || bpage->space != id
-		    || bpage->buf_fix_count > 0
 		    || bpage->io_fix != BUF_IO_NONE) {
-			/* We leave the fixed pages as is in this scan.
-			To be dealt with later in the final scan. */
+			/* Compressed pages are never hashed.
+			Skip blocks of other tablespaces.
+			Skip I/O-fixed blocks (to be dealt with later). */
 			mutex_exit(block_mutex);
-			goto next_page;
+next_page:
+			bpage = prev_bpage;
+			continue;
 		}
 
-		if (((buf_block_t*) bpage)->is_hashed) {
+		//mutex_enter(&((buf_block_t*) bpage)->mutex);
+		is_fixed = bpage->buf_fix_count > 0
+			|| !((buf_block_t*) bpage)->is_hashed;
+		//mutex_exit(&((buf_block_t*) bpage)->mutex);
 
-			/* Store the offset(i.e.: page_no) in the array
-			so that we can drop hash index in a batch
-			later. */
-			page_arr[num_entries] = bpage->offset;
+		if (is_fixed) {
 			mutex_exit(block_mutex);
-			ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
-			++num_entries;
+			goto next_page;
+		}
 
-			if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
-				goto next_page;
-			}
-			/* Array full. We release the buf_pool_mutex to
-			obey the latching order. */
-			//buf_pool_mutex_exit();
-			mutex_exit(&LRU_list_mutex);
+		/* Store the page number so that we can drop the hash
+		index in a batch later. */
+		page_arr[num_entries] = bpage->offset;
+		mutex_exit(block_mutex);
 
-			buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
-						     num_entries);
-			num_entries = 0;
-			//buf_pool_mutex_enter();
-			mutex_enter(&LRU_list_mutex);
-		} else {
-			mutex_exit(block_mutex);
+		ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
+		++num_entries;
+
+		if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
+			goto next_page;
 		}
 
-next_page:
-		/* Note that we may have released the buf_pool mutex
-		above after reading the prev_bpage during processing
-		of a page_hash_batch (i.e.: when the array was full).
-		This means that prev_bpage can change in LRU list.
-		This is OK because this function is a 'best effort'
-		to drop as many search hash entries as possible and
-		it does not guarantee that ALL such entries will be
-		dropped. */
-		bpage = prev_bpage;
+		/* Array full. We release the buf_pool_mutex to
+		obey the latching order. */
+		//buf_pool_mutex_exit();
+		mutex_exit(&LRU_list_mutex);
+		buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
+					     num_entries);
+		//buf_pool_mutex_enter();
+		mutex_enter(&LRU_list_mutex);
+		num_entries = 0;
+
+		/* Note that we released the buf_pool mutex above
+		after reading the prev_bpage during processing of a
+		page_hash_batch (i.e.: when the array was full).
+		Because prev_bpage could belong to a compressed-only
+		block, it may have been relocated, and thus the
+		pointer cannot be trusted. Because bpage is of type
+		buf_block_t, it is safe to dereference.
+
+		bpage can change in the LRU list. This is OK because
+		this function is a 'best effort' to drop as many
+		search hash entries as possible and it does not
+		guarantee that ALL such entries will be dropped. */
 
 		/* If, however, bpage has been removed from LRU list
 		to the free list then we should restart the scan.
 		bpage->state is protected by buf_pool mutex. */
-		if (bpage && !buf_page_in_file(bpage)) {
-			ut_a(num_entries == 0);
+
+		/* obtain block_mutex again to avoid race condition of bpage->state */
+		block_mutex = buf_page_get_mutex_enter(bpage);
+		if (!block_mutex) {
 			goto scan_again;
 		}
+
+		if (bpage
+		    && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+			mutex_exit(block_mutex);
+			goto scan_again;
+		}
+		mutex_exit(block_mutex);
 	}
 
 	//buf_pool_mutex_exit();
@@ -372,7 +392,7 @@ scan_again:
 
 	while (bpage != NULL) {
 		buf_page_t*	prev_bpage;
-		ibool		prev_bpage_buf_fix = FALSE;
+		mutex_t*	block_mutex = NULL;
 
 		ut_a(buf_page_in_file(bpage));
 
@@ -385,26 +405,28 @@ scan_again:
 		if (buf_page_get_space(bpage) != id) {
 			/* Skip this block, as it does not belong to
 			the space that is being invalidated. */
+			goto next_page;
 		} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
 			/* We cannot remove this page during this scan
 			yet; maybe the system is currently reading it
 			in, or flushing the modifications to the file */
 
 			all_freed = FALSE;
+			goto next_page;
 		} else {
-			mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+			block_mutex = buf_page_get_mutex_enter(bpage);
 
 			if (!block_mutex) {
 				/* It may be impossible case...
 				Something wrong, so will be scan_again */
 
 				all_freed = FALSE;
-
-				goto next_page_no_mutex;
+				goto next_page;
 			}
 
 			if (bpage->buf_fix_count > 0) {
 
+				mutex_exit(block_mutex);
 				/* We cannot remove this page during
 				this scan yet; maybe the system is
 				currently reading it in, or flushing
@@ -414,108 +436,61 @@ scan_again:
 
 				goto next_page;
 			}
+		}
+
+		ut_ad(mutex_own(block_mutex));
 
 #ifdef UNIV_DEBUG
-			if (buf_debug_prints) {
-				fprintf(stderr,
-					"Dropping space %lu page %lu\n",
-					(ulong) buf_page_get_space(bpage),
-					(ulong) buf_page_get_page_no(bpage));
-			}
+		if (buf_debug_prints) {
+			fprintf(stderr,
+				"Dropping space %lu page %lu\n",
+				(ulong) buf_page_get_space(bpage),
+				(ulong) buf_page_get_page_no(bpage));
+		}
 #endif
-			if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
-				/* This is a compressed-only block
-				descriptor.  Ensure that prev_bpage
-				cannot be relocated when bpage is freed. */
-				if (UNIV_LIKELY(prev_bpage != NULL)) {
-					switch (buf_page_get_state(
-							prev_bpage)) {
-					case BUF_BLOCK_FILE_PAGE:
-						/* Descriptors of uncompressed
-						blocks will not be relocated,
-						because we are holding the
-						buf_pool_mutex. */
-						break;
-					case BUF_BLOCK_ZIP_PAGE:
-					case BUF_BLOCK_ZIP_DIRTY:
-						/* Descriptors of compressed-
-						only blocks can be relocated,
-						unless they are buffer-fixed.
-						Because both bpage and
-						prev_bpage are protected by
-						buf_pool_zip_mutex, it is
-						not necessary to acquire
-						further mutexes. */
-						ut_ad(&buf_pool_zip_mutex
-						      == block_mutex);
-						ut_ad(mutex_own(block_mutex));
-						prev_bpage_buf_fix = TRUE;
-						prev_bpage->buf_fix_count++;
-						break;
-					default:
-						ut_error;
-					}
-				}
-			} else if (((buf_block_t*) bpage)->is_hashed) {
-				ulint	page_no;
-				ulint	zip_size;
-
-				//buf_pool_mutex_exit();
-				mutex_exit(&LRU_list_mutex);
-				rw_lock_x_unlock(&page_hash_latch);
+		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+			/* This is a compressed-only block
+			descriptor. Do nothing. */
+		} else if (((buf_block_t*) bpage)->is_hashed) {
+			ulint	page_no;
+			ulint	zip_size;
 
-				zip_size = buf_page_get_zip_size(bpage);
-				page_no = buf_page_get_page_no(bpage);
+			//buf_pool_mutex_exit();
+			mutex_exit(&LRU_list_mutex);
+			rw_lock_x_unlock(&page_hash_latch);
 
-				mutex_exit(block_mutex);
+			zip_size = buf_page_get_zip_size(bpage);
+			page_no = buf_page_get_page_no(bpage);
 
-				/* Note that the following call will acquire
-				an S-latch on the page */
+			mutex_exit(block_mutex);
 
-				btr_search_drop_page_hash_when_freed(
-					id, zip_size, page_no);
-				goto scan_again;
-			}
+			/* Note that the following call will acquire
+			an S-latch on the page */
 
-			if (bpage->oldest_modification != 0) {
+			btr_search_drop_page_hash_when_freed(
+				id, zip_size, page_no);
+			goto scan_again;
+		}
 
-				buf_flush_remove(bpage);
-			}
+		if (bpage->oldest_modification != 0) {
 
-			/* Remove from the LRU list. */
+			buf_flush_remove(bpage);
+		}
 
-			if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
-			    != BUF_BLOCK_ZIP_FREE) {
-				buf_LRU_block_free_hashed_page((buf_block_t*)
-							       bpage, TRUE);
-			} else {
-				/* The block_mutex should have been
-				released by buf_LRU_block_remove_hashed_page()
-				when it returns BUF_BLOCK_ZIP_FREE. */
-				ut_ad(block_mutex == &buf_pool_zip_mutex);
-				ut_ad(!mutex_own(block_mutex));
-
-				if (prev_bpage_buf_fix) {
-					/* We temporarily buffer-fixed
-					prev_bpage, so that
-					buf_buddy_free() could not
-					relocate it, in case it was a
-					compressed-only block
-					descriptor. */
-
-					mutex_enter(block_mutex);
-					ut_ad(prev_bpage->buf_fix_count > 0);
-					prev_bpage->buf_fix_count--;
-					mutex_exit(block_mutex);
-				}
+		/* Remove from the LRU list. */
 
-				goto next_page_no_mutex;
-			}
-next_page:
+		if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+		    != BUF_BLOCK_ZIP_FREE) {
+			buf_LRU_block_free_hashed_page((buf_block_t*) bpage, TRUE);
 			mutex_exit(block_mutex);
+		} else {
+			/* The block_mutex should have been released
+			by buf_LRU_block_remove_hashed_page() when it
+			returns BUF_BLOCK_ZIP_FREE. */
+			ut_ad(block_mutex == &buf_pool_zip_mutex);
+			ut_ad(!mutex_own(block_mutex));
 		}
-
-next_page_no_mutex:
+next_page:
 		bpage = prev_bpage;
 	}
 
@@ -539,6 +514,8 @@ buf_LRU_mark_space_was_deleted(
 	ulint	id)	/*!< in: space id */
 {
 	buf_page_t*	bpage;
+	buf_chunk_t*	chunk;
+	ulint		i, j;
 
 	mutex_enter(&LRU_list_mutex);
 
@@ -552,8 +529,32 @@ buf_LRU_mark_space_was_deleted(
 	}
 
 	mutex_exit(&LRU_list_mutex);
+
+	rw_lock_s_lock(&btr_search_latch);
+	chunk = buf_pool->chunks;
+	for (i = buf_pool->n_chunks; i--; chunk++) {
+		buf_block_t*	block	= chunk->blocks;
+		for (j = chunk->size; j--; block++) {
+			if (buf_block_get_state(block)
+			    != BUF_BLOCK_FILE_PAGE
+			    || !block->is_hashed
+			    || buf_page_get_space(&block->page) != id) {
+				continue;
+			}
+
+			rw_lock_s_unlock(&btr_search_latch);
+
+			rw_lock_x_lock(&block->lock);
+			btr_search_drop_page_hash_index(block);
+			rw_lock_x_unlock(&block->lock);
+
+			rw_lock_s_lock(&btr_search_latch);
+		}
+	}
+	rw_lock_s_unlock(&btr_search_latch);
 }
 
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
 UNIV_INTERN
@@ -587,6 +588,7 @@ buf_LRU_insert_zip_clean(
 		UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
 	}
 }
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 /******************************************************************//**
 Try to free an uncompressed page of a compressed block from the unzip
@@ -629,7 +631,7 @@ restart:
 	     UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
 	     block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
 
-		enum buf_lru_free_block_status	freed;
+		ibool freed;
 
 		mutex_enter(&block->mutex);
 		if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
@@ -645,24 +647,9 @@ restart:
 		freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
 		mutex_exit(&block->mutex);
 
-		switch (freed) {
-		case BUF_LRU_FREED:
+		if (freed) {
 			return(TRUE);
-
-		case BUF_LRU_CANNOT_RELOCATE:
-			/* If we failed to relocate, try
-			regular LRU eviction. */
-			return(FALSE);
-
-		case BUF_LRU_NOT_FREED:
-			/* The block was buffer-fixed or I/O-fixed.
-			Keep looking. */
-			continue;
 		}
-
-		/* inappropriate return value from
-		buf_LRU_free_block() */
-		ut_error;
 	}
 
 	return(FALSE);
@@ -695,10 +682,9 @@ restart:
 	     UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
 	     bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
 
-		enum buf_lru_free_block_status	freed;
-		unsigned			accessed;
-		mutex_t*			block_mutex
-			= buf_page_get_mutex_enter(bpage);
+		ibool		freed;
+		unsigned	accessed;
+		mutex_t*	block_mutex = buf_page_get_mutex_enter(bpage);
 
 		if (!block_mutex) {
 			goto restart;
@@ -717,8 +703,7 @@ restart:
 		freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
 		mutex_exit(block_mutex);
 
-		switch (freed) {
-		case BUF_LRU_FREED:
+		if (freed) {
 			/* Keep track of pages that are evicted without
 			ever being accessed. This gives us a measure of
 			the effectiveness of readahead */
@@ -726,21 +711,7 @@ restart:
 				++buf_pool->stat.n_ra_pages_evicted;
 			}
 			return(TRUE);
-
-		case BUF_LRU_NOT_FREED:
-			/* The block was dirty, buffer-fixed, or I/O-fixed.
-			Keep looking. */
-			continue;
-
-		case BUF_LRU_CANNOT_RELOCATE:
-			/* This should never occur, because we
-			want to discard the compressed page too. */
-			break;
 		}
-
-		/* inappropriate return value from
-		buf_LRU_free_block() */
-		ut_error;
 	}
 
 	return(FALSE);
@@ -1457,17 +1428,16 @@ buf_LRU_make_block_old(
 Try to free a block.  If bpage is a descriptor of a compressed-only
 page, the descriptor object will be freed as well.
 
-NOTE: If this function returns BUF_LRU_FREED, it will temporarily
+NOTE: If this function returns TRUE, it will temporarily
 release buf_pool_mutex.  Furthermore, the page frame will no longer be
 accessible via bpage.
 
 The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
 release these two mutexes after the call.  No other
 buf_page_get_mutex() may be held when calling this function.
-@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
-BUF_LRU_NOT_FREED otherwise. */
+@return TRUE if freed, FALSE otherwise. */
 UNIV_INTERN
-enum buf_lru_free_block_status
+ibool
 buf_LRU_free_block(
 /*===============*/
 	buf_page_t*	bpage,	/*!< in: block to be freed */
@@ -1493,7 +1463,7 @@ buf_LRU_free_block(
 	if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
 
 		/* Do not free buffer-fixed or I/O-fixed blocks. */
-		return(BUF_LRU_NOT_FREED);
+		return(FALSE);
 	}
 
 	if (bpage->space_was_being_deleted && bpage->oldest_modification != 0) {
@@ -1509,7 +1479,7 @@ buf_LRU_free_block(
 		/* Do not completely free dirty blocks. */
 
 		if (bpage->oldest_modification) {
-			return(BUF_LRU_NOT_FREED);
+			return(FALSE);
 		}
 	} else if (bpage->oldest_modification) {
 		/* Do not completely free dirty blocks. */
@@ -1517,7 +1487,7 @@ buf_LRU_free_block(
 		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
 			ut_ad(buf_page_get_state(bpage)
 			      == BUF_BLOCK_ZIP_DIRTY);
-			return(BUF_LRU_NOT_FREED);
+			return(FALSE);
 		}
 
 		goto alloc;
@@ -1526,14 +1496,8 @@ buf_LRU_free_block(
 		If it cannot be allocated (without freeing a block
 		from the LRU list), refuse to free bpage. */
 alloc:
-		//buf_pool_mutex_exit_forbid();
-		b = buf_buddy_alloc(sizeof *b, NULL, FALSE);
-		//buf_pool_mutex_exit_allow();
-
-		if (UNIV_UNLIKELY(!b)) {
-			return(BUF_LRU_CANNOT_RELOCATE);
-		}
-
+		b = buf_page_alloc_descriptor();
+		ut_a(b);
 		//memcpy(b, bpage, sizeof *b);
 	}
 
@@ -1563,7 +1527,7 @@ not_freed:
 		if (!have_LRU_mutex)
 			mutex_exit(&LRU_list_mutex);
 		rw_lock_x_unlock(&page_hash_latch);
-		return(BUF_LRU_NOT_FREED);
+		return(FALSE);
 	} else if (zip || !bpage->zip.data) {
 		if (bpage->oldest_modification)
 			goto not_freed;
@@ -1670,7 +1634,9 @@ not_freed:
 
 			mutex_enter(&flush_list_mutex);
 			if (b->state == BUF_BLOCK_ZIP_PAGE) {
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 				buf_LRU_insert_zip_clean(b);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 			} else {
 				/* Relocate on buf_pool->flush_list. */
 				buf_flush_relocate_on_flush_list(bpage, b);
@@ -1746,7 +1712,7 @@ not_freed:
 		rw_lock_x_unlock(&page_hash_latch);
 	}
 
-	return(BUF_LRU_FREED);
+	return(TRUE);
 }
 
 /******************************************************************//**
@@ -1967,15 +1933,16 @@ buf_LRU_block_remove_hashed_page(
 		ut_a(bpage->zip.data);
 		ut_a(buf_page_get_zip_size(bpage));
 
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 		mutex_exit(&buf_pool_zip_mutex);
 		//buf_pool_mutex_exit_forbid();
 		buf_buddy_free(bpage->zip.data,
 			       page_zip_get_size(&bpage->zip), TRUE);
-		buf_buddy_free(bpage, sizeof(*bpage), TRUE);
 		//buf_pool_mutex_exit_allow();
-		UNIV_MEM_UNDESC(bpage);
+		buf_page_free_descriptor(bpage);
 		return(BUF_BLOCK_ZIP_FREE);
 
 	case BUF_BLOCK_FILE_PAGE:
@@ -2284,6 +2251,11 @@ buf_LRU_file_restore(void)
 			" InnoDB: cannot open %s\n", LRU_DUMP_FILE);
 		goto end;
 	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr, " InnoDB: Restoring buffer pool pages from %s\n",
+		LRU_DUMP_FILE);
+
 	if (size == 0 || size_high > 0 || size % 8) {
 		fprintf(stderr, " InnoDB: broken LRU dump file\n");
 		goto end;
@@ -2385,7 +2357,7 @@ buf_LRU_file_restore(void)
 
 	ut_print_timestamp(stderr);
 	fprintf(stderr,
-		" InnoDB: reading pages based on the dumped LRU list was done."
+		" InnoDB: Completed reading buffer pool pages"
 		" (requested: %lu, read: %lu)\n", req, reads);
 	ret = TRUE;
 end:
diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c
index 59de70d9a8a..966a9b5d7a6 100644
--- a/storage/xtradb/buf/buf0rea.c
+++ b/storage/xtradb/buf/buf0rea.c
@@ -38,6 +38,14 @@ Created 11/5/1995 Heikki Tuuri
 #include "srv0start.h"
 #include "srv0srv.h"
 
+/** The size in blocks of the area where the random read-ahead algorithm counts
+the accessed pages when deciding whether to read-ahead */
+#define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA
+
+/** There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
+
 /** The linear read-ahead area size */
 #define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
 
@@ -201,13 +209,179 @@ not_to_recover:
 	if (sync) {
 		/* The i/o is already completed when we arrive from
 		fil_read */
-		buf_page_io_complete(bpage, trx);
+		buf_page_io_complete(bpage);
 	}
 
 	return(1);
 }
 
 /********************************************************************//**
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o.
+@return number of page read requests issued; NOTE that if we read ibuf
+pages, it may happen that the page at the given page number does not
+get read even if we return a positive value! */
+static
+ulint
+buf_read_ahead_random(
+/*==================*/
+	ulint	space,	/*!< in: space id */
+	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
+	ulint	offset,	/*!< in: page number of a page which the current thread
+			wants to access */
+	trx_t*	trx)
+{
+	ib_int64_t	tablespace_version;
+	ulint		recent_blocks	= 0;
+	ulint		count;
+	ulint		ibuf_mode;
+	ulint		low, high;
+	ulint		err;
+	ulint		i;
+	ulint		buf_read_ahead_random_area;
+
+	if (!srv_random_read_ahead) {
+		/* Disabled by user */
+		return(0);
+	}
+
+	if (srv_startup_is_before_trx_rollback_phase) {
+		/* No read-ahead to avoid thread deadlocks */
+		return(0);
+	}
+
+	if (ibuf_bitmap_page(zip_size, offset)
+	    || trx_sys_hdr_page(space, offset)) {
+
+		/* If it is an ibuf bitmap page or trx sys hdr, we do
+		no read-ahead, as that could break the ibuf page access
+		order */
+
+		return(0);
+	}
+
+	/* Remember the tablespace version before we ask the tablespace size
+	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+	do not try to read outside the bounds of the tablespace! */
+
+	tablespace_version = fil_space_get_version(space);
+
+	buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
+
+	low  = (offset / buf_read_ahead_random_area)
+		* buf_read_ahead_random_area;
+	high = (offset / buf_read_ahead_random_area + 1)
+		* buf_read_ahead_random_area;
+	if (high > fil_space_get_size(space)) {
+
+		high = fil_space_get_size(space);
+	}
+
+	//buf_pool_mutex_enter();
+	mutex_enter(&buf_pool_mutex);
+
+	if (buf_pool->n_pend_reads
+	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		//buf_pool_mutex_exit();
+		mutex_exit(&buf_pool_mutex);
+
+		return(0);
+	}
+	mutex_exit(&buf_pool_mutex);
+
+	/* Count how many blocks in the area have been recently accessed,
+	that is, reside near the start of the LRU list. */
+
+	rw_lock_s_lock(&page_hash_latch);
+	for (i = low; i < high; i++) {
+		const buf_page_t*	bpage = buf_page_hash_get(space, i);
+
+		if (bpage
+		    && buf_page_is_accessed(bpage)
+		    && buf_page_peek_if_young(bpage)) {
+
+			recent_blocks++;
+
+			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+
+				//buf_pool_mutex_exit();
+				rw_lock_s_unlock(&page_hash_latch);
+				goto read_ahead;
+			}
+		}
+	}
+
+	//buf_pool_mutex_exit();
+	rw_lock_s_unlock(&page_hash_latch);
+	/* Do nothing */
+	return(0);
+
+read_ahead:
+	/* Read all the suitable blocks within the area */
+
+	if (ibuf_inside()) {
+		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+	} else {
+		ibuf_mode = BUF_READ_ANY_PAGE;
+	}
+
+	count = 0;
+
+	for (i = low; i < high; i++) {
+		/* It is only sensible to do read-ahead in the non-sync aio
+		mode: hence FALSE as the first parameter */
+
+		if (!ibuf_bitmap_page(zip_size, i)) {
+			count += buf_read_page_low(
+				&err, FALSE,
+				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
+				space, zip_size, FALSE,
+				tablespace_version, i, trx);
+			if (err == DB_TABLESPACE_DELETED) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					"  InnoDB: Warning: in random"
+					" readahead trying to access\n"
+					"InnoDB: tablespace %lu page %lu,\n"
+					"InnoDB: but the tablespace does not"
+					" exist or is just being dropped.\n",
+					(ulong) space, (ulong) i);
+			}
+		}
+	}
+
+	/* In simulated aio we wake the aio handler threads only after
+	queuing all aio requests, in native aio the following call does
+	nothing: */
+
+	os_aio_simulated_wake_handler_threads();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints && (count > 0)) {
+		fprintf(stderr,
+			"Random read-ahead space %lu offset %lu pages %lu\n",
+			(ulong) space, (ulong) offset,
+			(ulong) count);
+	}
+#endif /* UNIV_DEBUG */
+
+	/* Read ahead is considered one I/O operation for the purpose of
+	LRU policy decision. */
+	buf_LRU_stat_inc_io();
+
+	buf_pool->stat.n_ra_pages_read_rnd += count;
+	return(count);
+}
+
+
+/********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
@@ -226,6 +400,9 @@ buf_read_page(
 	ulint		count;
 	ulint		err;
 
+	count = buf_read_ahead_random(space, zip_size, offset, trx);
+	srv_buf_pool_reads += count;
+
 	tablespace_version = fil_space_get_version(space);
 
 	/* We do the i/o in the synchronous aio mode to save thread
diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c
index 8d4bd76c32c..6b7e0bcffd2 100644
--- a/storage/xtradb/dict/dict0dict.c
+++ b/storage/xtradb/dict/dict0dict.c
@@ -4527,6 +4527,8 @@ dict_store_statistics(
 			break;
 		}
 
+		btr_pcur_store_position(&pcur, &mtr);
+
 		if (rec_get_deleted_flag(rec, 0)) {
 			/* don't count */
 			i--;
@@ -4567,6 +4569,10 @@ dict_store_statistics(
 		rests--;
 
 next_rec:
+		mtr_commit(&mtr);
+		mtr_start(&mtr);
+		btr_pcur_restore_position(BTR_MODIFY_LEAF, &pcur, &mtr);
+
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 	}
 	btr_pcur_close(&pcur);
@@ -4657,6 +4663,7 @@ dict_update_statistics(
 	do {
 		if (table->is_corrupt) {
 			ut_a(srv_pass_corrupt_table);
+			dict_table_stats_unlock(table, RW_X_LATCH);
 			return;
 		}
 
diff --git a/storage/xtradb/dict/dict0load.c b/storage/xtradb/dict/dict0load.c
index edd77e2530f..64d7fad3557 100644
--- a/storage/xtradb/dict/dict0load.c
+++ b/storage/xtradb/dict/dict0load.c
@@ -554,9 +554,10 @@ dict_load_columns(
 }
 
 /********************************************************************//**
-Loads definitions for index fields. */
+Loads definitions for index fields.
+@return DB_SUCCESS if ok, DB_CORRUPTION if failed */
 static
-void
+ulint
 dict_load_fields(
 /*=============*/
 	dict_index_t*	index,	/*!< in: index whose fields to load */
@@ -575,6 +576,7 @@ dict_load_fields(
 	byte*		buf;
 	ulint		i;
 	mtr_t		mtr;
+	ulint		error = DB_SUCCESS;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -641,6 +643,26 @@ dict_load_fields(
 
 		field = rec_get_nth_field_old(rec, 4, &len);
 
+		if (prefix_len >= DICT_MAX_INDEX_COL_LEN) {
+			fprintf(stderr, "InnoDB: Error: load index"
+					" '%s' failed.\n"
+					"InnoDB: index field '%s' has a prefix"
+					" length of %lu bytes,\n"
+					"InnoDB: which exceeds the"
+					" maximum limit of %lu bytes.\n"
+					"InnoDB: Please use server that"
+					" supports long index prefix\n"
+					"InnoDB: or turn on"
+					" innodb_force_recovery to load"
+					" the table\n",
+				index->name, mem_heap_strdupl(
+						heap, (char*) field, len),
+		    		(ulong) prefix_len,
+				(ulong) (DICT_MAX_INDEX_COL_LEN - 1));
+			error = DB_CORRUPTION;
+			goto func_exit;
+		}
+
 		dict_mem_index_add_field(index,
 					 mem_heap_strdupl(heap,
 							  (char*) field, len),
@@ -650,8 +672,10 @@ next_rec:
 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 	}
 
+func_exit:
 	btr_pcur_close(&pcur);
 	mtr_commit(&mtr);
+	return(error);
 }
 
 /********************************************************************//**
@@ -802,7 +826,25 @@ dict_load_indexes(
 						      space, type, n_fields);
 			index->id = id;
 
-			dict_load_fields(index, heap);
+			error = dict_load_fields(index, heap);
+
+			if (error != DB_SUCCESS) {
+				fprintf(stderr, "InnoDB: Error: load index '%s'"
+					" for table '%s' failed\n",
+					index->name, table->name);
+
+				/* If the force recovery flag is set, and
+				if the failed index is not the primary index, we
+				will continue and open other indexes */
+				if (srv_force_recovery
+				    && !(index->type & DICT_CLUSTERED)) {
+					error = DB_SUCCESS;
+					goto next_rec;
+				} else {
+					goto func_exit;
+				}
+			}
+
 			error = dict_index_add_to_cache(table, index, page_no,
 							FALSE);
 			/* The data dictionary tables should never contain
@@ -1028,9 +1070,18 @@ err_exit:
 		} else {
 			table->fk_max_recusive_level = 0;
 		}
-	} else if (!srv_force_recovery) {
-		dict_table_remove_from_cache(table);
-		table = NULL;
+	} else {
+		dict_index_t*	index;
+
+		/* Make sure that at least the clustered index was loaded.
+		Otherwise refuse to load the table */
+		index = dict_table_get_first_index(table);
+
+		if (!srv_force_recovery || !index
+		     || !(index->type & DICT_CLUSTERED)) {
+			dict_table_remove_from_cache(table);
+			table = NULL;
+		}
 	}
 #if 0
 	if (err != DB_SUCCESS && table != NULL) {
diff --git a/storage/xtradb/dict/dict0mem.c b/storage/xtradb/dict/dict0mem.c
index f2d219bfd4f..c3da053c2de 100644
--- a/storage/xtradb/dict/dict0mem.c
+++ b/storage/xtradb/dict/dict0mem.c
@@ -36,6 +36,9 @@ Created 1/8/1996 Heikki Tuuri
 #ifndef UNIV_HOTBACKUP
 # include "lock0lock.h"
 #endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_BLOB_DEBUG
+# include "ut0rbt.h"
+#endif /* UNIV_BLOB_DEBUG */
 
 #define	DICT_HEAP_SIZE		100	/*!< initial memory heap size when
 					creating a table or index object */
@@ -318,6 +321,12 @@ dict_mem_index_free(
 {
 	ut_ad(index);
 	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+#ifdef UNIV_BLOB_DEBUG
+	if (index->blobs) {
+		mutex_free(&index->blobs_mutex);
+		rbt_free(index->blobs);
+	}
+#endif /* UNIV_BLOB_DEBUG */
 
 	mem_heap_free(index->heap);
 }
diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c
index 3c535bf7331..17ca4cb1745 100644
--- a/storage/xtradb/fil/fil0fil.c
+++ b/storage/xtradb/fil/fil0fil.c
@@ -46,6 +46,8 @@ Created 10/25/1995 Heikki Tuuri
 #include "row0mysql.h"
 #include "row0row.h"
 #include "que0que.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0lru.h"
 # include "ibuf0ibuf.h"
@@ -817,7 +819,7 @@ fil_node_close_file(
 	ut_ad(node && system);
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->open);
-	ut_a(node->n_pending == 0 || srv_lazy_drop_table);
+	ut_a(node->n_pending == 0 || node->space->is_being_deleted);
 	ut_a(node->n_pending_flushes == 0);
 	ut_a(node->modification_counter == node->flush_counter);
 
@@ -830,7 +832,7 @@ fil_node_close_file(
 	ut_a(system->n_open > 0);
 	system->n_open--;
 
-	if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) {
+	if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) {
 		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
 
 		/* The node is in the LRU list, remove it */
@@ -1029,7 +1031,7 @@ fil_node_free(
 	ut_ad(node && system && space);
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
-	ut_a(node->n_pending == 0 || srv_lazy_drop_table);
+	ut_a(node->n_pending == 0 || space->is_being_deleted);
 
 	if (node->open) {
 		/* We fool the assertion in fil_node_close_file() to think
@@ -2579,7 +2581,7 @@ retry:
 
 		os_thread_sleep(20000);
 
-		fil_flush(id);
+		fil_flush(id, TRUE);
 
 		goto retry;
 
@@ -2792,7 +2794,7 @@ error_exit2:
 		goto error_exit;
 	}
 
-	ret = os_file_flush(file);
+	ret = os_file_flush(file, TRUE);
 
 	if (!ret) {
 		fputs("InnoDB: Error: file flush of tablespace ", stderr);
@@ -2977,7 +2979,7 @@ fil_reset_too_high_lsns(
 		}
 	}
 
-	success = os_file_flush(file);
+	success = os_file_flush(file, TRUE);
 	if (!success) {
 
 		goto func_exit;
@@ -2999,7 +3001,7 @@ fil_reset_too_high_lsns(
 
 		goto func_exit;
 	}
-	success = os_file_flush(file);
+	success = os_file_flush(file, TRUE);
 func_exit:
 	os_file_close(file);
 	ut_free(buf2);
@@ -3009,6 +3011,97 @@ func_exit:
 }
 
 /********************************************************************//**
+Checks if a page is corrupt. (for offline page)
+*/
+static
+ibool
+fil_page_buf_page_is_corrupted_offline(
+/*===================================*/
+	const byte*	page,		/*!< in: a database page */
+	ulint		zip_size)	/*!< in: size of compressed page;
+					0 for uncompressed pages */
+{
+	ulint		checksum_field;
+	ulint		old_checksum_field;
+
+	if (!zip_size
+	    && memcmp(page + FIL_PAGE_LSN + 4,
+		      page + UNIV_PAGE_SIZE
+		      - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
+		return(TRUE);
+	}
+
+	checksum_field = mach_read_from_4(page
+					  + FIL_PAGE_SPACE_OR_CHKSUM);
+
+	if (zip_size) {
+		return(checksum_field != BUF_NO_CHECKSUM_MAGIC
+		       && checksum_field
+		       != page_zip_calc_checksum(page, zip_size));
+	}
+
+	old_checksum_field = mach_read_from_4(
+		page + UNIV_PAGE_SIZE
+		- FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+	if (old_checksum_field != mach_read_from_4(page
+						   + FIL_PAGE_LSN)
+	    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
+	    && old_checksum_field
+	    != buf_calc_page_old_checksum(page)) {
+		return(TRUE);
+	}
+
+	if (!srv_fast_checksum
+	    && checksum_field != 0
+	    && checksum_field != BUF_NO_CHECKSUM_MAGIC
+	    && checksum_field
+	    != buf_calc_page_new_checksum(page)) {
+		return(TRUE);
+	}
+
+	if (srv_fast_checksum
+	    && checksum_field != 0
+	    && checksum_field != BUF_NO_CHECKSUM_MAGIC
+	    && checksum_field
+	    != buf_calc_page_new_checksum_32(page)
+	    && checksum_field
+	    != buf_calc_page_new_checksum(page)) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/********************************************************************//**
+*/
+static
+void
+fil_page_buf_page_store_checksum(
+/*=============================*/
+	byte*	page,
+	ulint	zip_size)
+{
+	if (!zip_size) {
+		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+				srv_use_checksums
+				? (!srv_fast_checksum
+				   ? buf_calc_page_new_checksum(page)
+				   : buf_calc_page_new_checksum_32(page))
+						: BUF_NO_CHECKSUM_MAGIC);
+		mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+				srv_use_checksums
+				? buf_calc_page_old_checksum(page)
+						: BUF_NO_CHECKSUM_MAGIC);
+	} else {
+		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+				srv_use_checksums
+				? page_zip_calc_checksum(page, zip_size)
+				: BUF_NO_CHECKSUM_MAGIC);
+	}
+}
+
+/********************************************************************//**
 Tries to open a single-table tablespace and optionally checks the space id is
 right in it. If does not succeed, prints an error message to the .err log. This
 function is used to open a tablespace when we start up mysqld, and also in
@@ -3123,6 +3216,7 @@ fil_open_single_table_tablespace(
 		fil_system_t*	system;
 		fil_node_t*	node = NULL;
 		fil_space_t*	space;
+		ulint		zip_size;
 
 		buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
 		descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
@@ -3140,12 +3234,15 @@ fil_open_single_table_tablespace(
 		/* store as first descr page */
 		memcpy(descr_page, page, UNIV_PAGE_SIZE);
 
+		zip_size = dict_table_flags_to_zip_size(flags);
+		ut_a(zip_size == dict_table_flags_to_zip_size(space_flags));
+
 		/* get free limit (page number) of the table space */
 /* these should be same to the definition in fsp0fsp.c */
 #define FSP_HEADER_OFFSET	FIL_PAGE_DATA
 #define	FSP_FREE_LIMIT		12
 		free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
-		free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
+		free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)(zip_size ? zip_size : UNIV_PAGE_SIZE);
 
 		/* overwrite fsp header */
 		fsp_header_init_fields(page, id, flags);
@@ -3154,16 +3251,9 @@ fil_open_single_table_tablespace(
 		space_flags = flags;
 		if (mach_read_ull(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
 			mach_write_ull(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
-		mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
-				srv_use_checksums
-				? (!srv_fast_checksum
-				   ? buf_calc_page_new_checksum(page)
-				   : buf_calc_page_new_checksum_32(page))
-						: BUF_NO_CHECKSUM_MAGIC);
-		mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-				srv_use_checksums
-				? buf_calc_page_old_checksum(page)
-						: BUF_NO_CHECKSUM_MAGIC);
+
+		fil_page_buf_page_store_checksum(page, zip_size);
+
 		success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
 
 		/* get file size */
@@ -3173,7 +3263,7 @@ fil_open_single_table_tablespace(
 
 		if (size_bytes < free_limit_bytes) {
 			free_limit_bytes = size_bytes;
-			if (size_bytes >= (ib_int64_t) (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE)) {
+			if (size_bytes >= (ib_int64_t) (FSP_EXTENT_SIZE * (zip_size ? zip_size : UNIV_PAGE_SIZE))) {
 				fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
 				file_is_corrupt = TRUE;
 			}
@@ -3237,75 +3327,41 @@ skip_info:
 			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
 		}
 		*/
-		if (!(flags & DICT_TF_ZSSIZE_MASK)) {
+
+		if (zip_size) {
+			fprintf(stderr, "InnoDB: Warning: importing compressed table is still EXPERIMENTAL, currently.\n");
+		}
+
+		{
 			mem_heap_t*	heap = NULL;
 			ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 			ulint*		offsets = offsets_;
 			ib_int64_t	offset;
 
-			size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+			size = (ulint) (size_bytes / (zip_size ? zip_size : UNIV_PAGE_SIZE));
 			/* over write space id of all pages */
 			rec_offs_init(offsets_);
 
 			fprintf(stderr, "InnoDB: Progress in %%:");
 
-			for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
-				ulint		checksum_field;
-				ulint		old_checksum_field;
+			for (offset = 0; offset < free_limit_bytes;
+			     offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
 				ibool		page_is_corrupt;
 
 				success = os_file_read(file, page,
 							(ulint)(offset & 0xFFFFFFFFUL),
-							(ulint)(offset >> 32), UNIV_PAGE_SIZE);
+							(ulint)(offset >> 32),
+							zip_size ? zip_size : UNIV_PAGE_SIZE);
 
 				page_is_corrupt = FALSE;
 
 				/* check consistency */
-				if (memcmp(page + FIL_PAGE_LSN + 4,
-					   page + UNIV_PAGE_SIZE
-					   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
-
+				if (fil_page_buf_page_is_corrupted_offline(page, zip_size)) {
 					page_is_corrupt = TRUE;
 				}
 
 				if (mach_read_from_4(page + FIL_PAGE_OFFSET)
-				    != offset / UNIV_PAGE_SIZE) {
-
-					page_is_corrupt = TRUE;
-				}
-
-				checksum_field = mach_read_from_4(page
-								  + FIL_PAGE_SPACE_OR_CHKSUM);
-
-				old_checksum_field = mach_read_from_4(
-					page + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM);
-
-				if (old_checksum_field != mach_read_from_4(page
-									   + FIL_PAGE_LSN)
-				    && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
-				    && old_checksum_field
-				    != buf_calc_page_old_checksum(page)) {
-
-					page_is_corrupt = TRUE;
-				}
-
-				if (!srv_fast_checksum
-				    && checksum_field != 0
-				    && checksum_field != BUF_NO_CHECKSUM_MAGIC
-				    && checksum_field
-				    != buf_calc_page_new_checksum(page)) {
-
-					page_is_corrupt = TRUE;
-				}
-
-				if (srv_fast_checksum
-				    && checksum_field != 0
-				    && checksum_field != BUF_NO_CHECKSUM_MAGIC
-				    && checksum_field
-				    != buf_calc_page_new_checksum_32(page)
-				    && checksum_field
-				    != buf_calc_page_new_checksum(page)) {
+				    != offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) {
 
 					page_is_corrupt = TRUE;
 				}
@@ -3316,7 +3372,8 @@ skip_info:
 					/* it should be overwritten already */
 					ut_a(!page_is_corrupt);
 
-				} else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
+				} else if (!((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE))
+					     % (zip_size ? zip_size : UNIV_PAGE_SIZE))) {
 					/* descr page (not header) */
 					if (page_is_corrupt) {
 						file_is_corrupt = TRUE;
@@ -3327,7 +3384,7 @@ skip_info:
 					}
 
 					/* store as descr page */
-					memcpy(descr_page, page, UNIV_PAGE_SIZE);
+					memcpy(descr_page, page, (zip_size ? zip_size : UNIV_PAGE_SIZE));
 
 				} else if (descr_is_corrupt) {
 					/* unknown state of the page */
@@ -3355,9 +3412,12 @@ skip_info:
 					ulint	bit_index;
 
 					descr = descr_page + XDES_ARR_OFFSET
-						+ XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
+						+ XDES_SIZE * (ut_2pow_remainder(
+							(offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)),
+							(zip_size ? zip_size : UNIV_PAGE_SIZE)) / FSP_EXTENT_SIZE);
 
-					index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
+					index = XDES_FREE_BIT
+						+ XDES_BITS_PER_PAGE * ((offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) % FSP_EXTENT_SIZE);
 					byte_index = index / 8;
 					bit_index = index % 8;
 
@@ -3375,7 +3435,7 @@ skip_info:
 				}
 
 				if (page_is_corrupt) {
-                                  fprintf(stderr, " [errp:%ld]", (long) (offset / UNIV_PAGE_SIZE));
+					fprintf(stderr, " [errp:%ld]", (long) (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)));
 
 					/* cannot treat corrupt page */
 					goto skip_write;
@@ -3385,7 +3445,13 @@ skip_info:
 					mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
 
 					for (i = 0; (ulint) i < n_index; i++) {
-                                                if ((ulint) (offset / UNIV_PAGE_SIZE) == root_page[i]) {
+						if ((ulint) (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)) == root_page[i]) {
+							if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
+								file_is_corrupt = TRUE;
+								fprintf(stderr, " [etyp:%ld]",
+									(long) (offset / (zip_size ? zip_size : UNIV_PAGE_SIZE)));
+								goto skip_write;
+							}
 							/* this is index root page */
 							mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
 											+ FSEG_HDR_SPACE, id);
@@ -3398,7 +3464,14 @@ skip_info:
 					if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
 						dulint tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
 
-						if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
+						for (i = 0; i < n_index; i++) {
+							if (ut_dulint_cmp(old_id[i], tmp) == 0) {
+								mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
+								break;
+							}
+						}
+
+						if (!zip_size && mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
 						    && ut_dulint_cmp(old_id[0], tmp) == 0) {
 							/* leaf page of cluster index, reset trx_id of records */
 							rec_t*	rec;
@@ -3417,7 +3490,7 @@ skip_info:
 										ULINT_UNDEFINED, &heap);
 								n_fields = rec_offs_n_fields(offsets);
 								if (!offset) {
-									offset = row_get_trx_id_offset(rec, index, offsets);
+									offset = row_get_trx_id_offset(index, offsets);
 								}
 								trx_write_trx_id(rec + offset, ut_dulint_create(0, 1));
 
@@ -3437,44 +3510,34 @@ skip_info:
 								rec = page_rec_get_next(rec);
 								n_recs--;
 							}
-						}
-
-						for (i = 0; i < n_index; i++) {
-							if (ut_dulint_cmp(old_id[i], tmp) == 0) {
-								mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
-								break;
-							}
+						} else if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
+							   && ut_dulint_cmp(old_id[0], tmp) != 0) {
+							mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), ut_dulint_create(0, 1));
 						}
 					}
 
 					if (mach_read_ull(page + FIL_PAGE_LSN) > current_lsn) {
 						mach_write_ull(page + FIL_PAGE_LSN, current_lsn);
-						mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-										current_lsn);
+						if (!zip_size) {
+							mach_write_ull(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+									current_lsn);
+						}
 					}
 
-					mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
-							srv_use_checksums
-							? (!srv_fast_checksum
-							   ? buf_calc_page_new_checksum(page)
-							   : buf_calc_page_new_checksum_32(page))
-									: BUF_NO_CHECKSUM_MAGIC);
-					mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
-							srv_use_checksums
-							? buf_calc_page_old_checksum(page)
-									: BUF_NO_CHECKSUM_MAGIC);
+					fil_page_buf_page_store_checksum(page, zip_size);
 
 					success = os_file_write(filepath, file, page,
 								(ulint)(offset & 0xFFFFFFFFUL),
-								(ulint)(offset >> 32), UNIV_PAGE_SIZE);
+								(ulint)(offset >> 32),
+								zip_size ? zip_size : UNIV_PAGE_SIZE);
 				}
 
 skip_write:
 				if (free_limit_bytes
-				    && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
+				    && ((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes)
 					!= ((offset * 100) / free_limit_bytes)) {
 					fprintf(stderr, " %lu",
-						(ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
+						(ulong)((ib_int64_t)((offset + (zip_size ? zip_size : UNIV_PAGE_SIZE)) * 100) / free_limit_bytes));
 				}
 			}
 
@@ -3530,13 +3593,6 @@ skip_write:
 			if (UNIV_LIKELY_NULL(heap)) {
 				mem_heap_free(heap);
 			}
-		} else {
-			/* zip page? */
-			size = (ulint)
-			(size_bytes
-					/ dict_table_flags_to_zip_size(flags));
-			fprintf(stderr, "InnoDB: import: table %s seems to be in newer format."
-					" It may not be able to treated for now.\n", name);
 		}
 		/* .exp file should be removed */
 		success = os_file_delete(info_file_path);
@@ -3618,6 +3674,269 @@ func_exit:
 	os_file_close(file);
 	mem_free(filepath);
 
+	if (srv_expand_import && dict_table_flags_to_zip_size(flags)) {
+		ulint		page_no;
+		ulint		zip_size;
+		ulint		height;
+		rec_t*		node_ptr;
+		dict_table_t*	table;
+		dict_index_t*	index;
+		buf_block_t*	block;
+		page_t*		page;
+		page_zip_des_t*	page_zip;
+		mtr_t		mtr;
+
+		mem_heap_t*	heap		= NULL;
+		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+		ulint*		offsets		= offsets_;
+
+		rec_offs_init(offsets_);
+
+		zip_size = dict_table_flags_to_zip_size(flags);
+
+		table = dict_table_get_low(name);
+		index = dict_table_get_first_index(table);
+		page_no = dict_index_get_page(index);
+		ut_a(page_no == 3);
+
+		fprintf(stderr, "InnoDB: It is compressed .ibd file. need to convert additionaly on buffer pool.\n");
+
+		/* down to leaf */
+		mtr_start(&mtr);
+		mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+		height = ULINT_UNDEFINED;
+
+		for (;;) {
+			block = buf_page_get(space_id, zip_size, page_no,
+					     RW_NO_LATCH, &mtr);
+			page = buf_block_get_frame(block);
+
+			block->check_index_page_at_flush = TRUE;
+
+			if (height == ULINT_UNDEFINED) {
+				height = btr_page_get_level(page, &mtr);
+			}
+
+			if (height == 0) {
+				break;
+			}
+
+			node_ptr = page_rec_get_next(page_get_infimum_rec(page));
+
+			height--;
+
+			offsets = rec_get_offsets(node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
+			page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
+		}
+
+		mtr_commit(&mtr);
+
+		fprintf(stderr, "InnoDB: pages needs split are ...");
+
+		/* scan reaf pages */
+		while (page_no != FIL_NULL) {
+			rec_t*	rec;
+			rec_t*	supremum;
+			ulint	n_recs;
+
+			mtr_start(&mtr);
+
+			block = buf_page_get(space_id, zip_size, page_no,
+					     RW_X_LATCH, &mtr);
+			page = buf_block_get_frame(block);
+			page_zip = buf_block_get_page_zip(block);
+
+			if (!page_zip) {
+				/*something wrong*/
+				fprintf(stderr, "InnoDB: Something wrong with reading page %lu.\n", page_no);
+convert_err_exit:
+				mtr_commit(&mtr);
+				mutex_enter(&fil_system->mutex);
+				fil_space_free(space_id, FALSE);
+				mutex_exit(&fil_system->mutex);
+				success = FALSE;
+				goto convert_exit;
+			}
+
+			supremum = page_get_supremum_rec(page);
+			rec = page_rec_get_next(page_get_infimum_rec(page));
+			n_recs = page_get_n_recs(page);
+
+			/* illegal operation as InnoDB online system. so not logged */
+			while (rec && rec != supremum && n_recs > 0) {
+				ulint	n_fields;
+				ulint	i;
+				ulint	offset = index->trx_id_offset;
+
+				offsets = rec_get_offsets(rec, index, offsets,
+						ULINT_UNDEFINED, &heap);
+				n_fields = rec_offs_n_fields(offsets);
+				if (!offset) {
+					offset = row_get_trx_id_offset(index, offsets);
+				}
+				trx_write_trx_id(rec + offset, ut_dulint_create(0, 1));
+
+				for (i = 0; i < n_fields; i++) {
+					if (rec_offs_nth_extern(offsets, i)) {
+						ulint	local_len;
+						byte*	data;
+
+						data = rec_get_nth_field(rec, offsets, i, &local_len);
+
+						local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+						mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
+					}
+				}
+
+				rec = page_rec_get_next(rec);
+				n_recs--;
+			}
+
+			/* dummy logged update for along with modified page path */
+			if (ut_dulint_cmp(index->id, btr_page_get_index_id(page)) != 0) {
+				/* this should be adjusted already */
+				fprintf(stderr, "InnoDB: The page %lu seems to be converted wrong.\n", page_no);
+				goto convert_err_exit;
+			}
+			btr_page_set_index_id(page, page_zip, index->id, &mtr);
+
+			/* confirm whether fits to the page size or not */
+			if (!page_zip_compress(page_zip, page, index, &mtr)
+			    && !btr_page_reorganize(block, index, &mtr)) {
+				buf_block_t*	new_block;
+				page_t*		new_page;
+				page_zip_des_t*	new_page_zip;
+				rec_t*		split_rec;
+				ulint		n_uniq;
+
+				/* split page is needed */
+				fprintf(stderr, " %lu", page_no);
+
+				mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+				n_uniq = dict_index_get_n_unique_in_tree(index);
+
+				if(page_get_n_recs(page) < 2) {
+					/* no way to make smaller */
+					fprintf(stderr, "InnoDB: The page %lu cannot be store to the page size.\n", page_no);
+					goto convert_err_exit;
+				}
+
+				if (UNIV_UNLIKELY(page_no == dict_index_get_page(index))) {
+					ulint		new_page_no;
+					dtuple_t*	node_ptr;
+					ulint		level;
+					rec_t*		node_ptr_rec;
+					page_cur_t	page_cursor;
+
+					/* it is root page, need to raise before split */
+
+					level = btr_page_get_level(page, &mtr);
+
+					new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, &mtr);
+					new_page = buf_block_get_frame(new_block);
+					new_page_zip = buf_block_get_page_zip(new_block);
+					btr_page_create(new_block, new_page_zip, index, level, &mtr);
+
+					btr_page_set_next(new_page, new_page_zip, FIL_NULL, &mtr);
+					btr_page_set_prev(new_page, new_page_zip, FIL_NULL, &mtr);
+
+					page_zip_copy_recs(new_page_zip, new_page,
+							   page_zip, page, index, &mtr);
+					btr_search_move_or_delete_hash_entries(new_block, block, index);
+
+					rec = page_rec_get_next(page_get_infimum_rec(new_page));
+					new_page_no = buf_block_get_page_no(new_block);
+
+					node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
+									     level);
+					dtuple_set_info_bits(node_ptr,
+							     dtuple_get_info_bits(node_ptr)
+							     | REC_INFO_MIN_REC_FLAG);
+					btr_page_empty(block, page_zip, index, level + 1, &mtr);
+
+					btr_page_set_next(page, page_zip, FIL_NULL, &mtr);
+					btr_page_set_prev(page, page_zip, FIL_NULL, &mtr);
+
+					page_cur_set_before_first(block, &page_cursor);
+
+					node_ptr_rec = page_cur_tuple_insert(&page_cursor, node_ptr,
+									     index, 0, &mtr);
+					ut_a(node_ptr_rec);
+
+					if (!btr_page_reorganize(block, index, &mtr)) {
+						fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
+						goto convert_err_exit;
+					}
+
+					/* move to the raised page */
+					page_no = new_page_no;
+					block = new_block;
+					page = new_page;
+					page_zip = new_page_zip;
+
+					fprintf(stderr, "(raise_to:%lu)", page_no);
+				}
+
+				split_rec = page_get_middle_rec(page);
+
+				new_block = btr_page_alloc(index, page_no + 1, FSP_UP,
+							   btr_page_get_level(page, &mtr), &mtr);
+				new_page = buf_block_get_frame(new_block);
+				new_page_zip = buf_block_get_page_zip(new_block);
+				btr_page_create(new_block, new_page_zip, index,
+						btr_page_get_level(page, &mtr), &mtr);
+
+				offsets = rec_get_offsets(split_rec, index, offsets, n_uniq, &heap);
+
+				btr_attach_half_pages(index, block,
+						      split_rec, new_block, FSP_UP, &mtr);
+
+				page_zip_copy_recs(new_page_zip, new_page,
+						   page_zip, page, index, &mtr);
+				page_delete_rec_list_start(split_rec - page + new_page,
+							   new_block, index, &mtr);
+				btr_search_move_or_delete_hash_entries(new_block, block, index);
+				page_delete_rec_list_end(split_rec, block, index,
+							 ULINT_UNDEFINED, ULINT_UNDEFINED, &mtr);
+
+				fprintf(stderr, "(new:%lu)", buf_block_get_page_no(new_block));
+
+				/* Are they needed? */
+				if (!btr_page_reorganize(block, index, &mtr)) {
+					fprintf(stderr, "InnoDB: failed to store the page %lu.\n", page_no);
+					goto convert_err_exit;
+				}
+				if (!btr_page_reorganize(new_block, index, &mtr)) {
+					fprintf(stderr, "InnoDB: failed to store the page %lu.\n", buf_block_get_page_no(new_block));
+					goto convert_err_exit;
+				}
+			}
+
+			page_no = btr_page_get_next(page, &mtr);
+
+			mtr_commit(&mtr);
+
+			if (heap) {
+				mem_heap_empty(heap);
+			}
+		}
+
+		fprintf(stderr, "...done.\nInnoDB: waiting the flush batch of the additional conversion.\n");
+
+		/* should wait for the not-logged changes are all flushed */
+		buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, mtr.end_lsn + 1);
+		buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+
+		fprintf(stderr, "InnoDB: done.\n");
+convert_exit:
+		if (UNIV_LIKELY_NULL(heap)) {
+			mem_heap_free(heap);
+		}
+	}
+
 	return(success);
 }
 #endif /* !UNIV_HOTBACKUP */
@@ -4466,7 +4785,7 @@ fil_extend_space_to_desired_size(
 	mutex_exit(&fil_system->mutex);
 	mutex_exit(&fil_system->file_extend_mutex);
 
-	fil_flush(space_id);
+	fil_flush(space_id, TRUE);
 
 	return(success);
 }
@@ -4835,7 +5154,7 @@ _fil_io(
 	    && ((buf_page_t*)message)->space_was_being_deleted) {
 
 		if (mode == OS_AIO_NORMAL) {
-			buf_page_io_complete(message, trx);
+			buf_page_io_complete(message);
 			return(DB_SUCCESS); /*fake*/
 		}
 		if (type == OS_FILE_READ) {
@@ -4946,14 +5265,14 @@ _fil_io(
 	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
 
-	if (srv_pass_corrupt_table && space->is_corrupt) {
+	if (srv_pass_corrupt_table == 1 && space->is_corrupt) {
 		/* should ignore i/o for the crashed space */
 		mutex_enter(&fil_system->mutex);
 		fil_node_complete_io(node, fil_system, type);
 		mutex_exit(&fil_system->mutex);
 		if (mode == OS_AIO_NORMAL) {
 			ut_a(space->purpose == FIL_TABLESPACE);
-			buf_page_io_complete(message, trx);
+			buf_page_io_complete(message);
 		}
 		if (type == OS_FILE_READ) {
 			return(DB_TABLESPACE_DELETED);
@@ -4961,7 +5280,19 @@ _fil_io(
 			return(DB_SUCCESS);
 		}
 	} else {
-		ut_a(!space->is_corrupt);
+		if (srv_pass_corrupt_table > 1 && space->is_corrupt) {
+			/* should ignore write i/o for the crashed space */
+			if (type == OS_FILE_WRITE) {
+				mutex_enter(&fil_system->mutex);
+				fil_node_complete_io(node, fil_system, type);
+				mutex_exit(&fil_system->mutex);
+				if (mode == OS_AIO_NORMAL) {
+					ut_a(space->purpose == FIL_TABLESPACE);
+					buf_page_io_complete(message);
+				}
+				return(DB_SUCCESS);
+			}
+		}
 #ifdef UNIV_HOTBACKUP
 	/* In ibbackup do normal i/o, not aio */
 	if (type == OS_FILE_READ) {
@@ -5122,7 +5453,7 @@ fil_aio_wait(
 		     || buf_page_get_state(message) != BUF_BLOCK_FILE_PAGE);
 
 		srv_set_io_thread_op_info(segment, "complete io for buf page");
-		buf_page_io_complete(message, NULL);
+		buf_page_io_complete(message);
 		return;
 	}
 
@@ -5146,7 +5477,7 @@ fil_aio_wait(
 
 	if (fil_node->space->purpose == FIL_TABLESPACE) {
 		srv_set_io_thread_op_info(segment, "complete io for buf page");
-		buf_page_io_complete(message, NULL);
+		buf_page_io_complete(message);
 	} else {
 		srv_set_io_thread_op_info(segment, "complete io for log");
 		log_io_complete(message);
@@ -5161,8 +5492,9 @@ UNIV_INTERN
 void
 fil_flush(
 /*======*/
-	ulint	space_id)	/*!< in: file space id (this can be a group of
+	ulint	space_id,	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
+	ibool	metadata)
 {
 	fil_space_t*	space;
 	fil_node_t*	node;
@@ -5233,7 +5565,7 @@ retry:
 			/* fprintf(stderr, "Flushing to file %s\n",
 			node->name); */
 
-			os_file_flush(file);
+			os_file_flush(file, metadata);
 
 			mutex_enter(&fil_system->mutex);
 
@@ -5316,7 +5648,7 @@ fil_flush_file_spaces(
 	a non-existing space id. */
 	for (i = 0; i < n_space_ids; i++) {
 
-		fil_flush(space_ids[i]);
+		fil_flush(space_ids[i], TRUE);
 	}
 
 	mem_free(space_ids);
diff --git a/storage/xtradb/ha/hash0hash.c b/storage/xtradb/ha/hash0hash.c
index 0f4fc55d895..30c304dafcd 100644
--- a/storage/xtradb/ha/hash0hash.c
+++ b/storage/xtradb/ha/hash0hash.c
@@ -128,70 +128,6 @@ hash_create(
 }
 
 /*************************************************************//**
-*/
-UNIV_INTERN
-ulint
-hash_create_needed(
-/*===============*/
-	ulint	n)
-{
-	ulint	prime;
-	ulint	offset;
-
-	prime = ut_find_prime(n);
-
-	offset = (sizeof(hash_table_t) + 7) / 8;
-	offset *= 8;
-
-	return(offset + sizeof(hash_cell_t) * prime);
-}
-
-UNIV_INTERN
-void
-hash_create_init(
-/*=============*/
-	hash_table_t*	table,
-	ulint		n)
-{
-	ulint	prime;
-	ulint	offset;
-
-	prime = ut_find_prime(n);
-
-	offset = (sizeof(hash_table_t) + 7) / 8;
-	offset *= 8;
-
-	table->array = (hash_cell_t*)(((byte*)table) + offset);
-	table->n_cells = prime;
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-	table->adaptive = FALSE;
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-	table->n_mutexes = 0;
-	table->mutexes = NULL;
-	table->heaps = NULL;
-	table->heap = NULL;
-	ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
-
-	/* Initialize the cell array */
-	hash_table_clear(table);
-}
-
-UNIV_INTERN
-void
-hash_create_reuse(
-/*==============*/
-	hash_table_t*	table)
-{
-	ulint	offset;
-
-	offset = (sizeof(hash_table_t) + 7) / 8;
-	offset *= 8;
-
-	table->array = (hash_cell_t*)(((byte*)table) + offset);
-	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-}
-
-/*************************************************************//**
 Frees a hash table. */
 UNIV_INTERN
 void
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 783508658a3..972a4407eea 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
 
@@ -198,11 +198,14 @@ static my_bool	innobase_create_status_file		= FALSE;
 static my_bool	innobase_stats_on_metadata		= TRUE;
 static my_bool	innobase_use_sys_stats_table		= FALSE;
 static my_bool	innobase_buffer_pool_shm_checksum	= TRUE;
+static uint	innobase_buffer_pool_shm_key		= 0;
 
 static char*	internal_innobase_data_file_path	= NULL;
 
 static char*	innodb_version_str = (char*) INNODB_VERSION_STR;
 
+static my_bool	innobase_blocking_lru_restore		= FALSE;
+
 /** Possible values for system variable "innodb_stats_method". The values
 are defined the same as its corresponding MyISAM system variable
 "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
@@ -367,6 +370,12 @@ static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit_session, PLUGIN_VAR_RQCMDARG,
   "The value 3 regards innodb_flush_log_at_trx_commit (default).",
   NULL, NULL, 3, 0, 3, 0);
 
+static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
+  "In the transaction after enabled, UPDATE, INSERT and DELETE only move the cursor to the records "
+  "and do nothing other operations (no changes, no ibuf, no undo, no transaction log) in the transaction. "
+  "This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.",
+  NULL, NULL, FALSE);
+
 
 static handler *innobase_create_handler(handlerton *hton,
                                         TABLE_SHARE *table,
@@ -547,6 +556,8 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_buffer_pool_pages_misc,	  SHOW_LONG},
   {"buffer_pool_pages_total",
   (char*) &export_vars.innodb_buffer_pool_pages_total,	  SHOW_LONG},
+  {"buffer_pool_read_ahead_rnd",
+  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
   {"buffer_pool_read_ahead",
   (char*) &export_vars.innodb_buffer_pool_read_ahead,	  SHOW_LONG},
   {"buffer_pool_read_ahead_evicted",
@@ -1432,6 +1443,8 @@ innobase_trx_init(
 	trx->check_unique_secondary = !thd_test_options(
 		thd, OPTION_RELAXED_UNIQUE_CHECKS);
 
+	trx->fake_changes = THDVAR(thd, fake_changes);
+
 #ifdef EXTENDED_SLOWLOG
 	if (thd_log_slow_verbosity(thd) & SLOG_V_INNODB) {
 		trx->take_stats = TRUE;
@@ -2504,6 +2517,12 @@ innobase_change_buffering_inited_ok:
 
 	srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
 
+	if (innobase_buffer_pool_shm_key) {
+		fprintf(stderr,
+			"InnoDB: Warning: innodb_buffer_pool_shm_key is deprecated function.\n"
+			"InnoDB:          innodb_buffer_pool_shm_key was ignored.\n");
+	}
+
 	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
 
 	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
@@ -2520,7 +2539,8 @@ innobase_change_buffering_inited_ok:
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
 	srv_use_checksums = (ibool) innobase_use_checksums;
 	srv_fast_checksum = (ibool) innobase_fast_checksum;
-	srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
+
+	srv_blocking_lru_restore = (ibool) innobase_blocking_lru_restore;
 
 #ifdef HAVE_LARGE_PAGES
         if ((os_use_large_pages = (ibool) my_use_large_pages))
@@ -2558,6 +2578,10 @@ innobase_change_buffering_inited_ok:
 
 	innobase_commit_concurrency_init_default();
 
+#ifndef EXTENDED_FOR_KILLIDLE
+	srv_kill_idle_transaction = 0;
+#endif
+
 	/* Since we in this module access directly the fields of a trx
 	struct, and due to different headers and flags it might happen that
 	mutex_t has a different size in this module and in InnoDB
@@ -2756,7 +2780,7 @@ innobase_commit_low(
 #ifdef MYSQL_SERVER
 	THD *thd=current_thd;
 
-	if (thd && thd->slave_thread) {
+	if (thd && thd_is_replication_slave_thread(thd)) {
 		/* Update the replication position info inside InnoDB.
 		   In embedded server, does nothing. */
 		const char *log_file_name, *group_relay_log_name;
@@ -2958,6 +2982,11 @@ innobase_commit(
 		trx_search_latch_release_if_reserved(trx);
 	}
 
+	if (trx->fake_changes && (all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
+		innobase_rollback(hton, thd, all); /* rollback implicitly */
+		thd->main_da.reset_diagnostics_area(); /* because debug assertion code complains, if something left */
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
 	/* The flag TRX_ACTIVE_IN_MYSQL in trx->active_flag is set in
 
 	1. ::external_lock(),
@@ -3903,7 +3932,7 @@ ha_innobase::open(
 		DBUG_RETURN(1);
 	}
 
-	if (share->ib_table && share->ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && share->ib_table && share->ib_table->is_corrupt) {
 		free_share(share);
 
 		DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
@@ -3936,18 +3965,18 @@ retry:
 	/* Get pointer to a table object in InnoDB dictionary cache */
 	ib_table = dict_table_get(norm_name, TRUE);
 	
-	if (ib_table && ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && ib_table && ib_table->is_corrupt) {
 		free_share(share);
 		my_free(upd_buff, MYF(0));
 
 		DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
 	}
 
-	if (share->ib_table) {
-		ut_a(share->ib_table == ib_table);
-	} else {
-		share->ib_table = ib_table;
-	}
+	share->ib_table = ib_table;
+
+
+
+
 
 	if (NULL == ib_table) {
 		if (is_part && retries < 10) {
@@ -4244,25 +4273,6 @@ field_in_record_is_null(
 	return(0);
 }
 
-/**************************************************************//**
-Sets a field in a record to SQL NULL. Uses the record format
-information in table to track the null bit in record. */
-static inline
-void
-set_field_in_record_to_null(
-/*========================*/
-	TABLE*	table,	/*!< in: MySQL table object */
-	Field*	field,	/*!< in: MySQL field object */
-	char*	record)	/*!< in: a row in MySQL format */
-{
-	int	null_offset;
-
-	null_offset = (uint) ((char*) field->null_ptr
-					- (char*) table->record[0]);
-
-	record[null_offset] = record[null_offset] | field->null_bit;
-}
-
 /*************************************************************//**
 InnoDB uses this function to compare two data fields for which the data type
 is such that we must use MySQL code to compare them. NOTE that the prototype
@@ -5672,14 +5682,15 @@ calc_row_difference(
 			/* The field has changed */
 
 			ufield = uvect->fields + n_changed;
+			UNIV_MEM_INVALID(ufield, sizeof *ufield);
 
 			/* Let us use a dummy dfield to make the conversion
 			from the MySQL column format to the InnoDB format */
 
-			dict_col_copy_type(prebuilt->table->cols + innodb_idx,
-					   dfield_get_type(&dfield));
-
 			if (n_len != UNIV_SQL_NULL) {
+				dict_col_copy_type(prebuilt->table->cols + innodb_idx,
+						   dfield_get_type(&dfield));
+
 				buf = row_mysql_store_col_in_innobase_format(
 					&dfield,
 					(byte*)buf,
@@ -5687,7 +5698,7 @@ calc_row_difference(
 					new_mysql_row_col,
 					col_pack_len,
 					dict_table_is_comp(prebuilt->table));
-				dfield_copy_data(&ufield->new_val, &dfield);
+				dfield_copy(&ufield->new_val, &dfield);
 			} else {
 				dfield_set_null(&ufield->new_val);
 			}
@@ -6128,7 +6139,7 @@ ha_innobase::index_read(
 
 	ha_statistic_increment(&SSV::ha_read_key_count);
 
-	if (share->ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && share->ib_table->is_corrupt) {
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -6197,7 +6208,7 @@ ha_innobase::index_read(
 		ret = DB_UNSUPPORTED;
 	}
 
-	if (share->ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && share->ib_table->is_corrupt) {
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -6316,7 +6327,7 @@ ha_innobase::change_active_index(
 {
 	DBUG_ENTER("change_active_index");
 
-	if (share->ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && share->ib_table->is_corrupt) {
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -6411,7 +6422,7 @@ ha_innobase::general_fetch(
 
 	DBUG_ENTER("general_fetch");
 
-	if (share->ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && share->ib_table->is_corrupt) {
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -6424,7 +6435,7 @@ ha_innobase::general_fetch(
 
 	innodb_srv_conc_exit_innodb(prebuilt->trx);
 
-	if (share->ib_table->is_corrupt) {
+	if (srv_pass_corrupt_table <= 1 && share->ib_table->is_corrupt) {
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
@@ -6765,10 +6776,6 @@ create_table_def(
 	DBUG_PRINT("enter", ("table_name: %s", table_name));
 
 	ut_a(trx->mysql_thd != NULL);
-	if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
-						  (THD*) trx->mysql_thd)) {
-		DBUG_RETURN(HA_ERR_GENERIC);
-	}
 
 	/* MySQL does the name length check. But we do additional check
 	on the name length here */
@@ -6890,6 +6897,8 @@ err_col:
 			col_len);
 	}
 
+	srv_lower_case_table_names = lower_case_table_names;
+
 	error = row_create_table_for_mysql(table, trx);
 
 	if (error == DB_DUPLICATE_KEY) {
@@ -7306,42 +7315,17 @@ ha_innobase::create(
 		DBUG_RETURN(HA_ERR_TO_BIG_ROW);
 	}
 
-	/* Get the transaction associated with the current thd, or create one
-	if not yet created */
-
-	parent_trx = check_trx_exists(thd);
-
-	/* In case MySQL calls this in the middle of a SELECT query, release
-	possible adaptive hash latch to avoid deadlocks of threads */
-
-	trx_search_latch_release_if_reserved(parent_trx);
-
-	trx = innobase_trx_allocate(thd);
-
-	if (lower_case_table_names) {
-		srv_lower_case_table_names = TRUE;
-	} else {
-		srv_lower_case_table_names = FALSE;
-	}
-
 	strcpy(name2, name);
 
 	normalize_table_name(norm_name, name2);
 
-	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
-	or lock waits can happen in it during a table create operation.
-	Drop table etc. do this latching in row0mysql.c. */
-
-	row_mysql_lock_data_dictionary(trx);
-
 	/* Create the table definition in InnoDB */
 
 	flags = 0;
 
 	/* Validate create options if innodb_strict_mode is set. */
 	if (!create_options_are_valid(thd, form, create_info)) {
-		error = ER_ILLEGAL_HA_CREATE_OPTION;
-		goto cleanup;
+		DBUG_RETURN(ER_ILLEGAL_HA_CREATE_OPTION);
 	}
 
 	if (create_info->key_block_size) {
@@ -7483,16 +7467,43 @@ ha_innobase::create(
 
 	/* Check for name conflicts (with reserved name) for
 	any user indices to be created. */
-	if (innobase_index_name_is_reserved(trx, form->key_info,
+	if (innobase_index_name_is_reserved(thd, form->key_info,
 					    form->s->keys)) {
-		error = -1;
-		goto cleanup;
+		DBUG_RETURN(-1);
+	}
+
+	if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
+		DBUG_RETURN(HA_ERR_GENERIC);
 	}
 
 	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
 		flags |= DICT_TF2_TEMPORARY << DICT_TF2_SHIFT;
 	}
 
+	/* Get the transaction associated with the current thd, or create one
+	if not yet created */
+
+	parent_trx = check_trx_exists(thd);
+
+	/* In case MySQL calls this in the middle of a SELECT query, release
+	possible adaptive hash latch to avoid deadlocks of threads */
+
+	trx_search_latch_release_if_reserved(parent_trx);
+
+	trx = innobase_trx_allocate(thd);
+
+	if (trx->fake_changes) {
+		innobase_commit_low(trx);
+		trx_free_for_mysql(trx);
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
+	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
+	or lock waits can happen in it during a table create operation.
+	Drop table etc. do this latching in row0mysql.c. */
+
+	row_mysql_lock_data_dictionary(trx);
+
 	error = create_table_def(trx, form, norm_name,
 		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
 		flags);
@@ -7692,6 +7703,10 @@ ha_innobase::delete_all_rows(void)
 		DBUG_RETURN(HA_ERR_CRASHED);
 	}
 
+	if (prebuilt->trx->fake_changes) {
+		goto fallback;
+	}
+
 	/* Truncate the table in InnoDB */
 
 	error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
@@ -7752,10 +7767,10 @@ ha_innobase::delete_table(
 
 	trx = innobase_trx_allocate(thd);
 
-	if (lower_case_table_names) {
-		srv_lower_case_table_names = TRUE;
-	} else {
-		srv_lower_case_table_names = FALSE;
+	if (trx->fake_changes) {
+		innobase_commit_low(trx);
+		trx_free_for_mysql(trx);
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
 	}
 
 	name_len = strlen(name);
@@ -7764,6 +7779,8 @@ ha_innobase::delete_table(
 
 	/* Drop the table in InnoDB */
 
+	srv_lower_case_table_names = lower_case_table_names;
+
 	error = row_drop_table_for_mysql(norm_name, trx,
 					 thd_sql_command(thd)
 					 == SQLCOM_DROP_DB);
@@ -7844,6 +7861,12 @@ innobase_drop_database(
 	trx->mysql_thd = NULL;
 #else
 	trx = innobase_trx_allocate(thd);
+	if (trx->fake_changes) {
+		my_free(namebuf, MYF(0));
+		innobase_commit_low(trx);
+		trx_free_for_mysql(trx);
+		return; /* ignore */
+	}
 #endif
 	row_drop_database_for_mysql(namebuf, trx);
 	my_free(namebuf, MYF(0));
@@ -7880,12 +7903,6 @@ innobase_rename_table(
 	char*	norm_from;
 	DBUG_ENTER("innobase_rename_table");
 
-	if (lower_case_table_names) {
-		srv_lower_case_table_names = TRUE;
-	} else {
-		srv_lower_case_table_names = FALSE;
-	}
-
 	// Magic number 64 arbitrary
 	norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0));
 	norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0));
@@ -7900,6 +7917,8 @@ innobase_rename_table(
 		row_mysql_lock_data_dictionary(trx);
 	}
 
+	srv_lower_case_table_names = lower_case_table_names;
+
 	error = row_rename_table_for_mysql(
 		norm_from, norm_to, trx, lock_and_commit);
 
@@ -7957,6 +7976,11 @@ ha_innobase::rename_table(
 	trx_search_latch_release_if_reserved(parent_trx);
 
 	trx = innobase_trx_allocate(thd);
+	if (trx->fake_changes) {
+		innobase_commit_low(trx);
+		trx_free_for_mysql(trx);
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
 
 	error = innobase_rename_table(trx, from, to, TRUE);
 
@@ -8460,6 +8484,8 @@ ha_innobase::info_low(
 
 	if (flag & HA_STATUS_VARIABLE) {
 
+		ulint	page_size;
+
 		dict_table_stats_lock(ib_table, RW_S_LATCH);
 
 		n_rows = ib_table->stat_n_rows;
@@ -8502,14 +8528,19 @@ ha_innobase::info_low(
 			prebuilt->autoinc_last_value = 0;
 		}
 
+		page_size = dict_table_zip_size(ib_table);
+		if (page_size == 0) {
+			page_size = UNIV_PAGE_SIZE;
+		}
+
 		stats.records = (ha_rows)n_rows;
 		stats.deleted = 0;
-		stats.data_file_length = ((ulonglong)
-				ib_table->stat_clustered_index_size)
-					* UNIV_PAGE_SIZE;
-		stats.index_file_length = ((ulonglong)
-				ib_table->stat_sum_of_other_index_sizes)
-					* UNIV_PAGE_SIZE;
+		stats.data_file_length
+			= ((ulonglong) ib_table->stat_clustered_index_size)
+			* page_size;
+		stats.index_file_length =
+			((ulonglong) ib_table->stat_sum_of_other_index_sizes)
+			* page_size;
 
 		dict_table_stats_unlock(ib_table, RW_S_LATCH);
 
@@ -9466,10 +9497,18 @@ ha_innobase::external_lock(
 
 	reset_template();
 
-	if (lock_type == F_WRLCK) {
+	if (lock_type == F_WRLCK
+	    || (table->s->tmp_table
+		&& thd_sql_command(thd) == SQLCOM_LOCK_TABLES)) {
 
 		/* If this is a SELECT, then it is in UPDATE TABLE ...
-		or SELECT ... FOR UPDATE */
+		or SELECT ... FOR UPDATE
+
+		For temporary tables which are locked for READ by LOCK TABLES
+		updates are still allowed by SQL-layer. In order to accomodate
+		for such a situation we always request X-lock for such table
+		at LOCK TABLES time.
+		*/
 		prebuilt->select_lock_type = LOCK_X;
 		prebuilt->stored_select_lock_type = LOCK_X;
 	}
@@ -10716,6 +10755,10 @@ innobase_xa_prepare(
 		return(0);
 	}
 
+	if (trx->fake_changes) {
+		return(0);
+	}
+
 	thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
 
 	/* Release a possible FIFO ticket and search latch. Since we will
@@ -10804,7 +10847,7 @@ innobase_commit_by_xid(
 
 	if (trx) {
 		innobase_commit_low(trx);
-
+		trx_free_for_background(trx);
 		return(XA_OK);
 	} else {
 		return(XAER_NOTA);
@@ -10830,7 +10873,9 @@ innobase_rollback_by_xid(
 	trx = trx_get_trx_by_xid(xid);
 
 	if (trx) {
-		return(innobase_rollback_trx(trx));
+		int	ret = innobase_rollback_trx(trx);
+		trx_free_for_background(trx);
+		return(ret);
 	} else {
 		return(XAER_NOTA);
 	}
@@ -11058,7 +11103,7 @@ ha_innobase::check_if_incompatible_data(
 	if (info_row_type == ROW_TYPE_DEFAULT)
 		info_row_type = ROW_TYPE_COMPACT;
 	if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) &&
-	    get_row_type() != ((info->row_type == ROW_TYPE_DEFAULT)
+	    row_type != ((info->row_type == ROW_TYPE_DEFAULT)
 				? ROW_TYPE_COMPACT : info->row_type)) {
 
 		DBUG_PRINT("info", ("get_row_type()=%d != info->row_type=%d -> "
@@ -11522,19 +11567,19 @@ static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
   return 0;
 }
 
-/***********************************************************************
+/*********************************************************************//**
 This function checks each index name for a table against reserved
-system default primary index name 'GEN_CLUST_INDEX'. If a name matches,
-this function pushes an warning message to the client, and returns true. */
+system default primary index name 'GEN_CLUST_INDEX'. If a name
+matches, this function pushes an warning message to the client,
+and returns true.
+@return true if the index name matches the reserved name */
 extern "C" UNIV_INTERN
 bool
 innobase_index_name_is_reserved(
 /*============================*/
-					/* out: true if an index name
-					matches the reserved name */
-	const trx_t*	trx,		/* in: InnoDB transaction handle */
-	const KEY*	key_info,	/* in: Indexes to be created */
-	ulint		num_of_keys)	/* in: Number of indexes to
+	THD*		thd,		/*!< in/out: MySQL connection */
+	const KEY*	key_info,	/*!< in: Indexes to be created */
+	ulint		num_of_keys)	/*!< in: Number of indexes to
 					be created. */
 {
 	const KEY*	key;
@@ -11546,7 +11591,7 @@ innobase_index_name_is_reserved(
 		if (innobase_strcasecmp(key->name,
 					innobase_index_reserve_name) == 0) {
 			/* Push warning to mysql */
-			push_warning_printf((THD*) trx->mysql_thd,
+			push_warning_printf(thd,
 					    MYSQL_ERROR::WARN_LEVEL_WARN,
 					    ER_WRONG_NAME_FOR_INDEX,
 					    "Cannot Create Index with name "
@@ -11565,6 +11610,48 @@ innobase_index_name_is_reserved(
 	return(false);
 }
 
+/***********************************************************************
+functions for kill session of idle transaction */
+extern "C"
+ibool
+innobase_thd_is_idle(
+/*=================*/
+	const void*	thd)	/*!< in: thread handle (THD*) */
+{
+#ifdef EXTENDED_FOR_KILLIDLE
+	return(thd_command((const THD*) thd) == COM_SLEEP);
+#else
+	return(FALSE);
+#endif
+}
+
+extern "C"
+ib_int64_t
+innobase_thd_get_start_time(
+/*========================*/
+	const void*	thd)	/*!< in: thread handle (THD*) */
+{
+#ifdef EXTENDED_FOR_KILLIDLE
+	return((ib_int64_t)thd_start_time((const THD*) thd));
+#else
+	return(0); /*dummy value*/
+#endif
+}
+
+extern "C"
+void
+innobase_thd_kill(
+/*==============*/
+	void*	thd)
+{
+#ifdef EXTENDED_FOR_KILLIDLE
+	thd_kill((THD*) thd);
+#else
+	return;
+#endif
+}
+
+
 static SHOW_VAR innodb_status_variables_export[]= {
   {"Innodb",                   (char*) &show_innodb_vars, SHOW_FUNC},
   {NullS, NullS, SHOW_LONG}
@@ -11624,7 +11711,7 @@ static MYSQL_SYSVAR_BOOL(recovery_stats, innobase_recovery_stats,
 static MYSQL_SYSVAR_ULINT(use_purge_thread, srv_use_purge_thread,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of purge devoted threads. #### over 1 is EXPERIMENTAL ####",
-  NULL, NULL, 1, 0, 64, 0);
+  NULL, NULL, 1, 0, UNIV_MAX_PARALLELISM, 0);
 
 static MYSQL_SYSVAR_BOOL(overwrite_relay_log_info, innobase_overwrite_relay_log_info,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -11796,16 +11883,16 @@ static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
-  NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
+  NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
 
-static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
+static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, innobase_buffer_pool_shm_key,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
+  "[Deprecated option] no effect",
   NULL, NULL, 0, 0, INT_MAX32, 0);
 
 static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Enable buffer_pool_shm checksum validation (enabled by default).",
+  "[Deprecated option] no effect",
   NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
@@ -11818,6 +11905,14 @@ static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
   NULL, NULL, 500L, 1L, ~0L, 0);
 
+#ifdef EXTENDED_FOR_KILLIDLE
+#define kill_idle_help_text "If non-zero value, the idle session with transaction which is idle over the value in seconds is killed by InnoDB."
+#else
+#define kill_idle_help_text "No effect for this build."
+#endif
+static MYSQL_SYSVAR_LONGLONG(kill_idle_transaction, srv_kill_idle_transaction,
+  PLUGIN_VAR_RQCMDARG, kill_idle_help_text, NULL, NULL, 0, 0, LONG_MAX, 0);
+
 static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
   "Number of file I/O threads in InnoDB.",
@@ -11953,6 +12048,11 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
   NULL, NULL, 0, 0, 1, 0);
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
 
+static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
+  PLUGIN_VAR_NOCMDARG,
+  "Whether to use read ahead for random access within an extent.",
+  NULL, NULL, FALSE);
+
 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
   PLUGIN_VAR_RQCMDARG,
   "Number of pages that must be accessed sequentially for InnoDB to "
@@ -12077,13 +12177,19 @@ static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
   "0 (the default) disables automatic dumps.",
   NULL, NULL, 0, 0, UINT_MAX32, 0);
 
+static MYSQL_SYSVAR_BOOL(blocking_lru_restore, innobase_blocking_lru_restore,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Block XtraDB startup process until buffer pool is full restored from a "
+  "dump file (if present). Disabled by default.",
+  NULL, NULL, FALSE);
+
 static	MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table,
   PLUGIN_VAR_RQCMDARG,
   "Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, "
   "when used with file_per_table. "
   "All file io for the datafile after detected as corrupt are disabled, "
   "except for the deletion.",
-  NULL, NULL, 0, 0, 1, 0);
+  NULL, NULL, 0, 0, 2, 0);
 
 static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table,
   PLUGIN_VAR_RQCMDARG,
@@ -12103,6 +12209,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(fast_checksum),
   MYSQL_SYSVAR(commit_concurrency),
   MYSQL_SYSVAR(concurrency_tickets),
+  MYSQL_SYSVAR(kill_idle_transaction),
   MYSQL_SYSVAR(data_file_path),
   MYSQL_SYSVAR(doublewrite_file),
   MYSQL_SYSVAR(data_home_dir),
@@ -12177,12 +12284,15 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
   MYSQL_SYSVAR(change_buffering_debug),
 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+  MYSQL_SYSVAR(random_read_ahead),
   MYSQL_SYSVAR(read_ahead_threshold),
   MYSQL_SYSVAR(io_capacity),
   MYSQL_SYSVAR(auto_lru_dump),
+  MYSQL_SYSVAR(blocking_lru_restore),
   MYSQL_SYSVAR(use_purge_thread),
   MYSQL_SYSVAR(pass_corrupt_table),
   MYSQL_SYSVAR(lazy_drop_table),
+  MYSQL_SYSVAR(fake_changes),
   NULL
 };
 
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 7b263db2537..749438e0c89 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -390,15 +390,14 @@ innobase_trx_allocate(
 This function checks each index name for a table against reserved
 system default primary index name 'GEN_CLUST_INDEX'. If a name
 matches, this function pushes an warning message to the client,
-and returns true. */
+and returns true.
+@return true if the index name matches the reserved name */
 extern "C"
 bool
 innobase_index_name_is_reserved(
 /*============================*/
-					/* out: true if the index name
-					matches the reserved name */
-	const trx_t*	trx,		/* in: InnoDB transaction handle */
-	const KEY*	key_info,	/* in: Indexes to be created */
-	ulint		num_of_keys);	/* in: Number of indexes to
+	THD*		thd,		/*!< in/out: MySQL connection */
+	const KEY*	key_info,	/*!< in: Indexes to be created */
+	ulint		num_of_keys);	/*!< in: Number of indexes to
 					be created. */
 
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index c54b4a17fd2..37fddf71cbc 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -649,44 +649,47 @@ ha_innobase::add_index(
 
 	update_thd();
 
-	heap = mem_heap_create(1024);
-
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads. */
 	trx_search_latch_release_if_reserved(prebuilt->trx);
-	trx_start_if_not_started(prebuilt->trx);
+	if (prebuilt->trx->fake_changes) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
 
-	/* Create a background transaction for the operations on
-	the data dictionary tables. */
-	trx = innobase_trx_allocate(user_thd);
-	trx_start_if_not_started(trx);
+	/* Check if the index name is reserved. */
+	if (innobase_index_name_is_reserved(user_thd, key_info, num_of_keys)) {
+		DBUG_RETURN(-1);
+	}
 
 	innodb_table = indexed_table
 		= dict_table_get(prebuilt->table->name, FALSE);
 
 	if (UNIV_UNLIKELY(!innodb_table)) {
-		error = HA_ERR_NO_SUCH_TABLE;
-		goto err_exit;
+		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
 	}
 
-	/* Check if the index name is reserved. */
-	if (innobase_index_name_is_reserved(trx, key_info, num_of_keys)) {
-                error = ER_WRONG_NAME_FOR_INDEX;
-	} else {
-		/* Check that index keys are sensible */
-		error = innobase_check_index_keys(key_info, num_of_keys,
-						  innodb_table);
-	}
+	/* Check that index keys are sensible */
+	error = innobase_check_index_keys(key_info, num_of_keys, innodb_table);
 
 	if (UNIV_UNLIKELY(error)) {
-err_exit:
+		DBUG_RETURN(error);
+	}
+
+	heap = mem_heap_create(1024);
+	trx_start_if_not_started(prebuilt->trx);
+
+	/* Create a background transaction for the operations on
+	the data dictionary tables. */
+	trx = innobase_trx_allocate(user_thd);
+	if (trx->fake_changes) {
 		mem_heap_free(heap);
 		trx_general_rollback_for_mysql(trx, NULL);
 		trx_free_for_mysql(trx);
-		trx_commit_for_mysql(prebuilt->trx);
-		DBUG_RETURN(error);
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
 	}
 
+	trx_start_if_not_started(trx);
+
 	/* Create table containing all indexes to be built in this
 	alter table add index so that they are in the correct order
 	in the table. */
@@ -758,8 +761,12 @@ err_exit:
 
 			ut_d(dict_table_check_for_dup_indexes(innodb_table,
 							      FALSE));
+			mem_heap_free(heap);
+			trx_general_rollback_for_mysql(trx, NULL);
 			row_mysql_unlock_data_dictionary(trx);
-			goto err_exit;
+			trx_free_for_mysql(trx);
+			trx_commit_for_mysql(prebuilt->trx);
+			DBUG_RETURN(error);
 		}
 
 		trx->table_id = indexed_table->id;
@@ -782,10 +789,6 @@ err_exit:
 
 	ut_ad(error == DB_SUCCESS);
 
-	/* We will need to rebuild index translation table. Set
-	valid index entry count in the translation table to zero */
-	share->idx_trans_tbl.index_count = 0;
-
 	/* Commit the data dictionary transaction in order to release
 	the table locks on the system tables.  This means that if
 	MySQL crashes while creating a new primary key inside
@@ -911,6 +914,14 @@ error:
 		}
 
 convert_error:
+		if (error == DB_SUCCESS) {
+			/* Build index is successful. We will need to
+			rebuild index translation table.  Reset the
+			index entry count in the translation table
+			to zero, so that translation table will be rebuilt */
+			share->idx_trans_tbl.index_count = 0;
+		}
+
 		error = convert_error_code_to_mysql(error,
 						    innodb_table->flags,
 						    user_thd);
@@ -963,6 +974,10 @@ ha_innobase::prepare_drop_index(
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 	trx = prebuilt->trx;
 
+	if (trx->fake_changes) {
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
 	/* Test and mark all the indexes to be dropped */
 
 	row_mysql_lock_data_dictionary(trx);
@@ -1167,6 +1182,12 @@ ha_innobase::final_drop_index(
 	/* Create a background transaction for the operations on
 	the data dictionary tables. */
 	trx = innobase_trx_allocate(user_thd);
+	if (trx->fake_changes) {
+		trx_general_rollback_for_mysql(trx, NULL);
+		trx_free_for_mysql(trx);
+		DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+	}
+
 	trx_start_if_not_started(trx);
 
 	/* Flag this transaction as a dictionary operation, so that
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index d989ce87aa3..15c09d69830 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -1006,7 +1006,7 @@ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_pool_pages =
 
 	/* plugin author (for SHOW PLUGINS) */
 	/* const char* */
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 
 	/* general descriptive text (for SHOW PLUGINS) */
 	/* const char* */
@@ -1055,7 +1055,7 @@ UNIV_INTERN struct st_maria_plugin      i_s_innodb_buffer_pool_pages_maria =
 
         /* plugin author (for SHOW PLUGINS) */
         /* const char* */
-        STRUCT_FLD(author, plugin_author),
+        STRUCT_FLD(author, "Percona"),
 
         /* general descriptive text (for SHOW PLUGINS) */
         /* const char* */
@@ -1108,7 +1108,7 @@ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_pool_pages_index =
 
 	/* plugin author (for SHOW PLUGINS) */
 	/* const char* */
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 
 	/* general descriptive text (for SHOW PLUGINS) */
 	/* const char* */
@@ -1157,7 +1157,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_pool_pages_index_maria =
 
         /* plugin author (for SHOW PLUGINS) */
         /* const char* */
-        STRUCT_FLD(author, plugin_author),
+        STRUCT_FLD(author, "Percona"),
 
         /* general descriptive text (for SHOW PLUGINS) */
         /* const char* */
@@ -1210,7 +1210,7 @@ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_buffer_pool_pages_blob =
 
 	/* plugin author (for SHOW PLUGINS) */
 	/* const char* */
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 
 	/* general descriptive text (for SHOW PLUGINS) */
 	/* const char* */
@@ -1259,7 +1259,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_pool_pages_blob_maria =
 
         /* plugin author (for SHOW PLUGINS) */
         /* const char* */
-        STRUCT_FLD(author, plugin_author),
+        STRUCT_FLD(author, "Percona"),
 
         /* general descriptive text (for SHOW PLUGINS) */
         /* const char* */
@@ -3158,7 +3158,7 @@ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_rseg =
 
 	/* plugin author (for SHOW PLUGINS) */
 	/* const char* */
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 
 	/* general descriptive text (for SHOW PLUGINS) */
 	/* const char* */
@@ -3207,7 +3207,7 @@ UNIV_INTERN struct st_maria_plugin      i_s_innodb_rseg_maria =
 
         /* plugin author (for SHOW PLUGINS) */
         /* const char* */
-        STRUCT_FLD(author, plugin_author),
+        STRUCT_FLD(author, "Percona"),
 
         /* general descriptive text (for SHOW PLUGINS) */
         /* const char* */
@@ -3246,6 +3246,189 @@ UNIV_INTERN struct st_maria_plugin      i_s_innodb_rseg_maria =
 
 /***********************************************************************
 */
+static ST_FIELD_INFO	i_s_innodb_admin_command_info[] =
+{
+	{STRUCT_FLD(field_name,		"result_message"),
+	 STRUCT_FLD(field_length,	1024),
+	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
+	 STRUCT_FLD(value,		0),
+	 STRUCT_FLD(field_flags,	0),
+	 STRUCT_FLD(old_name,		""),
+	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
+
+	END_OF_ST_FIELD_INFO
+};
+
+#ifndef INNODB_COMPATIBILITY_HOOKS
+#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
+#endif
+
+extern "C" {
+char **thd_query(MYSQL_THD thd);
+}
+
+static
+int
+i_s_innodb_admin_command_fill(
+/*==========================*/
+	THD*		thd,
+	TABLE_LIST*	tables,
+	COND*		cond)
+{
+	TABLE*	i_s_table	= (TABLE *) tables->table;
+	char**	query_str;
+	char*	ptr;
+	char	quote	= '\0';
+	const char*	command_head = "XTRA_";
+
+	DBUG_ENTER("i_s_innodb_admin_command_fill");
+
+	/* deny access to non-superusers */
+	if (check_global_access(thd, PROCESS_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	if(thd_sql_command(thd) != SQLCOM_SELECT) {
+		field_store_string(i_s_table->field[0],
+			"SELECT command is only accepted.");
+		goto end_func;
+	}
+
+	query_str = thd_query(thd);
+	ptr = *query_str;
+	
+	for (; *ptr; ptr++) {
+		if (*ptr == quote) {
+			quote = '\0';
+		} else if (quote) {
+		} else if (*ptr == '`' || *ptr == '"') {
+			quote = *ptr;
+		} else {
+			long	i;
+			for (i = 0; command_head[i]; i++) {
+				if (toupper((int)(unsigned char)(ptr[i]))
+				    != toupper((int)(unsigned char)
+				      (command_head[i]))) {
+					goto nomatch;
+				}
+			}
+			break;
+nomatch:
+			;
+		}
+	}
+
+	if (!*ptr) {
+		field_store_string(i_s_table->field[0],
+			"No XTRA_* command in the SQL statement."
+			" Please add /*!XTRA_xxxx*/ to the SQL.");
+		goto end_func;
+	}
+
+	if (!strncasecmp("XTRA_HELLO", ptr, 10)) {
+		/* This is example command XTRA_HELLO */
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: administration command test for XtraDB"
+				" 'XTRA_HELLO' was detected.\n");
+
+		field_store_string(i_s_table->field[0],
+			"Hello!");
+		goto end_func;
+	}
+	else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_DUMP'"
+				" was detected.\n");
+
+		if (buf_LRU_file_dump()) {
+			field_store_string(i_s_table->field[0],
+				"XTRA_LRU_DUMP was succeeded.");
+		} else {
+			field_store_string(i_s_table->field[0],
+				"XTRA_LRU_DUMP was failed.");
+		}
+
+		goto end_func;
+	}
+	else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_RESTORE'"
+				" was detected.\n");
+
+		if (buf_LRU_file_restore()) {
+			field_store_string(i_s_table->field[0],
+				"XTRA_LRU_RESTORE was succeeded.");
+		} else {
+			field_store_string(i_s_table->field[0],
+				"XTRA_LRU_RESTORE was failed.");
+		}
+
+		goto end_func;
+	}
+
+	field_store_string(i_s_table->field[0],
+		"Undefined XTRA_* command.");
+	goto end_func;
+
+end_func:
+	if (schema_table_store_record(thd, i_s_table)) {
+		DBUG_RETURN(1);
+	} else {
+		DBUG_RETURN(0);
+	}
+}
+
+static
+int
+i_s_innodb_admin_command_init(
+/*==========================*/
+	void*	p)
+{
+	DBUG_ENTER("i_s_innodb_admin_command_init");
+	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+	schema->fields_info = i_s_innodb_admin_command_info;
+	schema->fill_table = i_s_innodb_admin_command_fill;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_innodb_admin_command =
+{
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"),
+	STRUCT_FLD(author, "Percona"),
+	STRUCT_FLD(descr, "XtraDB specific command acceptor"),
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+	STRUCT_FLD(init, i_s_innodb_admin_command_init),
+	STRUCT_FLD(deinit, i_s_common_deinit),
+	STRUCT_FLD(version, 0x0100 /* 1.0 */),
+	STRUCT_FLD(status_vars, NULL),
+	STRUCT_FLD(system_vars, NULL),
+	STRUCT_FLD(__reserved1, NULL)
+};
+
+UNIV_INTERN struct st_maria_plugin      i_s_innodb_admin_command_maria =
+{
+        STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+        STRUCT_FLD(info, &i_s_info),
+        STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"),
+        STRUCT_FLD(author, "Percona"),
+        STRUCT_FLD(descr, "XtraDB specific command acceptor"),
+        STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+        STRUCT_FLD(init, i_s_innodb_admin_command_init),
+        STRUCT_FLD(deinit, i_s_common_deinit),
+        STRUCT_FLD(version, 0x0100 /* 1.0 */),
+        STRUCT_FLD(status_vars, NULL),
+        STRUCT_FLD(system_vars, NULL),
+        STRUCT_FLD(version_info, "1.0"),
+        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
+};
+
+/***********************************************************************
+*/
 static ST_FIELD_INFO	i_s_innodb_table_stats_info[] =
 {
 	{STRUCT_FLD(field_name,		"table_schema"),
@@ -3561,7 +3744,7 @@ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_table_stats =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_TABLE_STATS"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB table statistics in memory"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_table_stats_init),
@@ -3577,7 +3760,7 @@ UNIV_INTERN struct st_maria_plugin      i_s_innodb_table_stats_maria =
         STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
         STRUCT_FLD(info, &i_s_info),
         STRUCT_FLD(name, "INNODB_TABLE_STATS"),
-        STRUCT_FLD(author, plugin_author),
+        STRUCT_FLD(author, "Percona"),
         STRUCT_FLD(descr, "InnoDB table statistics in memory"),
         STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
         STRUCT_FLD(init, i_s_innodb_table_stats_init),
@@ -3594,7 +3777,7 @@ UNIV_INTERN struct st_mysql_plugin	i_s_innodb_index_stats =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_INDEX_STATS"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB index statistics in memory"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_index_stats_init),
@@ -3610,7 +3793,7 @@ UNIV_INTERN struct st_maria_plugin      i_s_innodb_index_stats_maria =
         STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
         STRUCT_FLD(info, &i_s_info),
         STRUCT_FLD(name, "INNODB_INDEX_STATS"),
-        STRUCT_FLD(author, plugin_author),
+        STRUCT_FLD(author, "Percona"),
         STRUCT_FLD(descr, "InnoDB index statistics in memory"),
         STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
         STRUCT_FLD(init, i_s_innodb_index_stats_init),
@@ -3622,188 +3805,6 @@ UNIV_INTERN struct st_maria_plugin      i_s_innodb_index_stats_maria =
         STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
 };
 
-/***********************************************************************
-*/
-static ST_FIELD_INFO	i_s_innodb_admin_command_info[] =
-{
-	{STRUCT_FLD(field_name,		"result_message"),
-	 STRUCT_FLD(field_length,	1024),
-	 STRUCT_FLD(field_type,		MYSQL_TYPE_STRING),
-	 STRUCT_FLD(value,		0),
-	 STRUCT_FLD(field_flags,	0),
-	 STRUCT_FLD(old_name,		""),
-	 STRUCT_FLD(open_method,	SKIP_OPEN_TABLE)},
-
-	END_OF_ST_FIELD_INFO
-};
-
-#ifndef INNODB_COMPATIBILITY_HOOKS
-#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
-#endif
-
-extern "C" {
-char **thd_query(MYSQL_THD thd);
-}
-
-static
-int
-i_s_innodb_admin_command_fill(
-/*==========================*/
-	THD*		thd,
-	TABLE_LIST*	tables,
-	COND*		cond)
-{
-	TABLE*	i_s_table	= (TABLE *) tables->table;
-	char**	query_str;
-	char*	ptr;
-	char	quote	= '\0';
-	const char*	command_head = "XTRA_";
-
-	DBUG_ENTER("i_s_innodb_admin_command_fill");
-
-	/* deny access to non-superusers */
-	if (check_global_access(thd, PROCESS_ACL)) {
-		DBUG_RETURN(0);
-	}
-
-	if(thd_sql_command(thd) != SQLCOM_SELECT) {
-		field_store_string(i_s_table->field[0],
-			"SELECT command is only accepted.");
-		goto end_func;
-	}
-
-	query_str = thd_query(thd);
-	ptr = *query_str;
-	
-	for (; *ptr; ptr++) {
-		if (*ptr == quote) {
-			quote = '\0';
-		} else if (quote) {
-		} else if (*ptr == '`' || *ptr == '"') {
-			quote = *ptr;
-		} else {
-			long	i;
-			for (i = 0; command_head[i]; i++) {
-				if (toupper((int)(unsigned char)(ptr[i]))
-				    != toupper((int)(unsigned char)
-				      (command_head[i]))) {
-					goto nomatch;
-				}
-			}
-			break;
-nomatch:
-			;
-		}
-	}
-
-	if (!*ptr) {
-		field_store_string(i_s_table->field[0],
-			"No XTRA_* command in the SQL statement."
-			" Please add /*!XTRA_xxxx*/ to the SQL.");
-		goto end_func;
-	}
-
-	if (!strncasecmp("XTRA_HELLO", ptr, 10)) {
-		/* This is example command XTRA_HELLO */
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: administration command test for XtraDB"
-				" 'XTRA_HELLO' was detected.\n");
-
-		field_store_string(i_s_table->field[0],
-			"Hello!");
-		goto end_func;
-	}
-	else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_DUMP'"
-				" was detected.\n");
-
-		if (buf_LRU_file_dump()) {
-			field_store_string(i_s_table->field[0],
-				"XTRA_LRU_DUMP was succeeded.");
-		} else {
-			field_store_string(i_s_table->field[0],
-				"XTRA_LRU_DUMP was failed.");
-		}
-
-		goto end_func;
-	}
-	else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_RESTORE'"
-				" was detected.\n");
-
-		if (buf_LRU_file_restore()) {
-			field_store_string(i_s_table->field[0],
-				"XTRA_LRU_RESTORE was succeeded.");
-		} else {
-			field_store_string(i_s_table->field[0],
-				"XTRA_LRU_RESTORE was failed.");
-		}
-
-		goto end_func;
-	}
-
-	field_store_string(i_s_table->field[0],
-		"Undefined XTRA_* command.");
-	goto end_func;
-
-end_func:
-	if (schema_table_store_record(thd, i_s_table)) {
-		DBUG_RETURN(1);
-	} else {
-		DBUG_RETURN(0);
-	}
-}
-
-static
-int
-i_s_innodb_admin_command_init(
-/*==========================*/
-	void*	p)
-{
-	DBUG_ENTER("i_s_innodb_admin_command_init");
-	ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
-	schema->fields_info = i_s_innodb_admin_command_info;
-	schema->fill_table = i_s_innodb_admin_command_fill;
-
-	DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin	i_s_innodb_admin_command =
-{
-	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-	STRUCT_FLD(info, &i_s_info),
-	STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"),
-	STRUCT_FLD(author, plugin_author),
-	STRUCT_FLD(descr, "XtraDB specific command acceptor"),
-	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-	STRUCT_FLD(init, i_s_innodb_admin_command_init),
-	STRUCT_FLD(deinit, i_s_common_deinit),
-	STRUCT_FLD(version, 0x0100 /* 1.0 */),
-	STRUCT_FLD(status_vars, NULL),
-	STRUCT_FLD(system_vars, NULL),
-	STRUCT_FLD(__reserved1, NULL)
-};
-
-UNIV_INTERN struct st_maria_plugin      i_s_innodb_admin_command_maria =
-{
-        STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-        STRUCT_FLD(info, &i_s_info),
-        STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"),
-        STRUCT_FLD(author, plugin_author),
-        STRUCT_FLD(descr, "XtraDB specific command acceptor"),
-        STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-        STRUCT_FLD(init, i_s_innodb_admin_command_init),
-        STRUCT_FLD(deinit, i_s_common_deinit),
-        STRUCT_FLD(version, 0x0100 /* 1.0 */),
-        STRUCT_FLD(status_vars, NULL),
-        STRUCT_FLD(system_vars, NULL),
-        STRUCT_FLD(version_info, "1.0"),
-        STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
-};
 
 static ST_FIELD_INFO	i_s_innodb_sys_tables_info[] =
 {
@@ -4340,15 +4341,14 @@ i_s_innodb_schema_table_fill(
 		rec = btr_pcur_get_rec(&pcur);
 		if (!btr_pcur_is_on_user_rec(&pcur)) {
 			/* end of index */
-			btr_pcur_close(&pcur);
-			mtr_commit(&mtr);
 			break;
 		}
+
+		btr_pcur_store_position(&pcur, &mtr);
+
 		if (rec_get_deleted_flag(rec, 0)) {
 			/* record marked as deleted */
-			btr_pcur_close(&pcur);
-			mtr_commit(&mtr);
-			continue;
+			goto next_record;
 		}
 
 		if (id == 0) {
@@ -4359,33 +4359,23 @@ i_s_innodb_schema_table_fill(
 			status = copy_sys_stats_rec(table, index, rec);
 		}
 		if (status) {
-			btr_pcur_close(&pcur);
-			mtr_commit(&mtr);
 			break;
 		}
 
-#if 0
-		btr_pcur_store_position(&pcur, &mtr);
-		mtr_commit(&mtr);
-
 		status = schema_table_store_record(thd, table);
 		if (status) {
-			btr_pcur_close(&pcur);
 			break;
 		}
+next_record:
+		mtr_commit(&mtr);
 
 		mtr_start(&mtr);
 		btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
-#else
-		status = schema_table_store_record(thd, table);
-		if (status) {
-			btr_pcur_close(&pcur);
-			mtr_commit(&mtr);
-			break;
-		}
-#endif
 	}
 
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+
 	mutex_exit(&(dict_sys->mutex));
 
 	DBUG_RETURN(status);
@@ -4441,7 +4431,7 @@ UNIV_INTERN struct st_mysql_plugin   i_s_innodb_sys_tables =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_SYS_TABLES"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB SYS_TABLES table"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_sys_tables_init),
@@ -4457,7 +4447,7 @@ UNIV_INTERN struct st_maria_plugin   i_s_innodb_sys_tables_maria =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_SYS_TABLES"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB SYS_TABLES table"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_sys_tables_init),
@@ -4474,7 +4464,7 @@ UNIV_INTERN struct st_mysql_plugin   i_s_innodb_sys_indexes =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_SYS_INDEXES"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB SYS_INDEXES table"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_sys_indexes_init),
@@ -4490,7 +4480,7 @@ UNIV_INTERN struct st_maria_plugin   i_s_innodb_sys_indexes_maria =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_SYS_INDEXES"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB SYS_INDEXES table"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_sys_indexes_init),
@@ -4507,7 +4497,7 @@ UNIV_INTERN struct st_mysql_plugin   i_s_innodb_sys_stats =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_SYS_STATS"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB SYS_STATS table"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_sys_stats_init),
@@ -4523,7 +4513,7 @@ UNIV_INTERN struct st_maria_plugin   i_s_innodb_sys_stats_maria =
 	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
 	STRUCT_FLD(info, &i_s_info),
 	STRUCT_FLD(name, "INNODB_SYS_STATS"),
-	STRUCT_FLD(author, plugin_author),
+	STRUCT_FLD(author, "Percona"),
 	STRUCT_FLD(descr, "InnoDB SYS_STATS table"),
 	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
 	STRUCT_FLD(init, i_s_innodb_sys_stats_init),
diff --git a/storage/xtradb/handler/innodb_patch_info.h b/storage/xtradb/handler/innodb_patch_info.h
index e68f12d0fec..38b97411340 100644
--- a/storage/xtradb/handler/innodb_patch_info.h
+++ b/storage/xtradb/handler/innodb_patch_info.h
@@ -47,6 +47,5 @@ struct innodb_enhancement {
 {"innodb_fast_checksum","Using the checksum on 32bit-unit calculation","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"},
 {"innodb_files_extend","allow >4GB transaction log files, and can vary universal page size of datafiles","incompatible for unpatched ver.","http://www.percona.com/docs/wiki/percona-xtradb"},
 {"innodb_sys_tables_sys_indexes","Expose InnoDB SYS_TABLES and SYS_INDEXES schema tables","","http://www.percona.com/docs/wiki/percona-xtradb"},
-{"innodb_buffer_pool_shm","Put buffer pool contents to shared memory segment and reuse it at clean restart [experimental]","","http://www.percona.com/docs/wiki/percona-xtradb"},
 {NULL, NULL, NULL, NULL}
 };
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c
index 3f741da60bb..64dc9a5591d 100644
--- a/storage/xtradb/ibuf/ibuf0ibuf.c
+++ b/storage/xtradb/ibuf/ibuf0ibuf.c
@@ -2613,6 +2613,8 @@ ibuf_insert_low(
 
 	ut_a(trx_sys_multiple_tablespace_format);
 
+	ut_ad(!(thr_get_trx(thr)->fake_changes));
+
 	do_merge = FALSE;
 
 	mutex_enter(&ibuf_mutex);
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h
index dde3a0bab69..1fe40965c0f 100644
--- a/storage/xtradb/include/btr0btr.h
+++ b/storage/xtradb/include/btr0btr.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -81,6 +81,91 @@ UNIQUE definition on secondary indexes when we decide if we can use
 the insert buffer to speed up inserts */
 #define BTR_IGNORE_SEC_UNIQUE	2048
 
+#ifdef UNIV_BLOB_DEBUG
+# include "ut0rbt.h"
+/** An index->blobs entry for keeping track of off-page column references */
+struct btr_blob_dbg_struct
+{
+	unsigned	blob_page_no:32;	/*!< first BLOB page number */
+	unsigned	ref_page_no:32;		/*!< referring page number */
+	unsigned	ref_heap_no:16;		/*!< referring heap number */
+	unsigned	ref_field_no:10;	/*!< referring field number */
+	unsigned	owner:1;		/*!< TRUE if BLOB owner */
+	unsigned	always_owner:1;		/*!< TRUE if always
+						has been the BLOB owner;
+						reset to TRUE on B-tree
+						page splits and merges */
+	unsigned	del:1;			/*!< TRUE if currently
+						delete-marked */
+};
+
+/**************************************************************//**
+Add a reference to an off-page column to the index->blobs map. */
+UNIV_INTERN
+void
+btr_blob_dbg_add_blob(
+/*==================*/
+	const rec_t*	rec,		/*!< in: clustered index record */
+	ulint		field_no,	/*!< in: number of off-page column */
+	ulint		page_no,	/*!< in: start page of the column */
+	dict_index_t*	index,		/*!< in/out: index tree */
+	const char*	ctx)		/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Display the references to off-page columns.
+This function is to be called from a debugger,
+for example when a breakpoint on ut_dbg_assertion_failed is hit. */
+UNIV_INTERN
+void
+btr_blob_dbg_print(
+/*===============*/
+	const dict_index_t*	index)	/*!< in: index tree */
+	__attribute__((nonnull));
+/**************************************************************//**
+Check that there are no references to off-page columns from or to
+the given page. Invoked when freeing or clearing a page.
+@return TRUE when no orphan references exist */
+UNIV_INTERN
+ibool
+btr_blob_dbg_is_empty(
+/*==================*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		page_no)	/*!< in: page number */
+	__attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Modify the 'deleted' flag of a record. */
+UNIV_INTERN
+void
+btr_blob_dbg_set_deleted_flag(
+/*==========================*/
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in/out: index */
+	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
+	ibool			del)	/*!< in: TRUE=deleted, FALSE=exists */
+	__attribute__((nonnull));
+/**************************************************************//**
+Change the ownership of an off-page column. */
+UNIV_INTERN
+void
+btr_blob_dbg_owner(
+/*===============*/
+	const rec_t*		rec,	/*!< in: record */
+	dict_index_t*		index,	/*!< in/out: index */
+	const ulint*		offsets,/*!< in: rec_get_offs(rec, index) */
+	ulint			i,	/*!< in: ith field in rec */
+	ibool			own)	/*!< in: TRUE=owned, FALSE=disowned */
+	__attribute__((nonnull));
+/** Assert that there are no BLOB references to or from the given page. */
+# define btr_blob_dbg_assert_empty(index, page_no)	\
+	ut_a(btr_blob_dbg_is_empty(index, page_no))
+#else /* UNIV_BLOB_DEBUG */
+# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx)	((void) 0)
+# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
+# define btr_blob_dbg_owner(rec, index, offsets, i, val)	((void) 0)
+# define btr_blob_dbg_assert_empty(index, page_no)		((void) 0)
+#endif /* UNIV_BLOB_DEBUG */
+
 /**************************************************************//**
 Gets the root node of a tree and x-latches it.
 @return	root page, x-latched */
@@ -123,6 +208,17 @@ btr_block_get_func(
 @return the uncompressed page frame */
 # define btr_page_get(space,zip_size,page_no,mode,mtr) \
 	buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,mtr))
+/**************************************************************//**
+Sets the index id field of a page. */
+UNIV_INLINE
+void
+btr_page_set_index_id(
+/*==================*/
+	page_t*		page,	/*!< in: page to be created */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	dulint		id,	/*!< in: index id */
+	mtr_t*		mtr);	/*!< in: mtr */
 #endif /* !UNIV_HOTBACKUP */
 /**************************************************************//**
 Gets the index id field of a page.
@@ -160,6 +256,17 @@ btr_page_get_next(
 	const page_t*	page,	/*!< in: index page */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle */
 /********************************************************//**
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		next,	/*!< in: next page number */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
 Gets the previous index page number.
 @return	prev page number */
 UNIV_INLINE
@@ -168,6 +275,17 @@ btr_page_get_prev(
 /*==============*/
 	const page_t*	page,	/*!< in: index page */
 	mtr_t*		mtr);	/*!< in: mini-transaction handle */
+/********************************************************//**
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+	page_t*		page,	/*!< in: index page */
+	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
+				part will be updated, or NULL */
+	ulint		prev,	/*!< in: previous page number */
+	mtr_t*		mtr);	/*!< in: mini-transaction handle */
 /*************************************************************//**
 Gets pointer to the previous user record in the tree. It is assumed
 that the caller has appropriate latches on the page and its neighbor.
@@ -213,6 +331,18 @@ btr_node_ptr_get_child_page_no(
 /*===========================*/
 	const rec_t*	rec,	/*!< in: node pointer record */
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+/**************************************************************//**
+Creates a new index page (not the root, and also not
+used in page reorganization).  @see btr_page_empty(). */
+UNIV_INTERN
+void
+btr_page_create(
+/*============*/
+	buf_block_t*	block,	/*!< in/out: page to be created */
+	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
+	dict_index_t*	index,	/*!< in: index */
+	ulint		level,	/*!< in: the B-tree level of the page */
+	mtr_t*		mtr);	/*!< in: mtr */
 /************************************************************//**
 Creates the root node for a new index tree.
 @return	page number of the created root, FIL_NULL if did not succeed */
@@ -283,6 +413,17 @@ btr_page_reorganize(
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr);	/*!< in: mtr */
 /*************************************************************//**
+Empties an index page.  @see btr_page_create(). */
+UNIV_INTERN
+void
+btr_page_empty(
+/*===========*/
+	buf_block_t*	block,	/*!< in: page to be emptied */
+	page_zip_des_t*	page_zip,/*!< out: compressed page, or NULL */
+	dict_index_t*	index,	/*!< in: index of the page */
+	ulint		level,	/*!< in: the B-tree level of the page */
+	mtr_t*		mtr);	/*!< in: mtr */
+/*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to left.
 @return	TRUE if split recommended */
@@ -341,6 +482,20 @@ btr_insert_on_non_leaf_level_func(
 # define btr_insert_on_non_leaf_level(i,l,t,m)				\
 	btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
 #endif /* !UNIV_HOTBACKUP */
+/**************************************************************//**
+Attaches the halves of an index page on the appropriate level in an
+index tree. */
+UNIV_INTERN
+void
+btr_attach_half_pages(
+/*==================*/
+	dict_index_t*	index,		/*!< in: the index tree */
+	buf_block_t*	block,		/*!< in/out: page to be split */
+	const rec_t*	split_rec,	/*!< in: first record on upper
+					half page */
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
+	mtr_t*		mtr);		/*!< in: mtr */
 /****************************************************************//**
 Sets a record as the predefined minimum record. */
 UNIV_INTERN
@@ -385,11 +540,14 @@ UNIV_INTERN
 ibool
 btr_compress(
 /*=========*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to merge or lift;
-				the page must not be empty: in record delete
-				use btr_discard_page if the page would become
-				empty */
-	mtr_t*		mtr);	/*!< in: mtr */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to merge
+				or lift; the page must not be empty:
+				when deleting records, use btr_discard_page()
+				if the page would become empty */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /*************************************************************//**
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index ece3621fa97..6f4ce95d72f 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,6 +36,9 @@ Created 10/16/1994 Heikki Tuuri
 #define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
 #define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
 					update vector or inserted entry */
+#define BTR_KEEP_POS_FLAG	8	/* btr_cur_pessimistic_update()
+					must keep cursor position when
+					moving columns to big_rec */
 
 #ifndef UNIV_HOTBACKUP
 #include "que0types.h"
@@ -309,7 +312,9 @@ btr_cur_pessimistic_update(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging, locking, and rollback
 				flags */
-	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
+				cursor may become invalid if *big_rec == NULL
+				|| !(flags & BTR_KEEP_POS_FLAG) */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
@@ -321,6 +326,16 @@ btr_cur_pessimistic_update(
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr);	/*!< in: mtr; must be committed before
 				latching any further pages */
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /***********************************************************//**
 Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
@@ -376,10 +391,13 @@ UNIV_INTERN
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to compress;
 				cursor does not stay valid if compression
 				occurs */
-	mtr_t*		mtr);	/*!< in: mtr */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /*******************************************************//**
 Removes the record on which the tree cursor is positioned. It is assumed
 that the mtr has an x-latch on the page where the cursor is positioned,
@@ -652,6 +670,11 @@ struct btr_path_struct{
 				order); value ULINT_UNDEFINED
 				denotes array end */
 	ulint	n_recs;		/*!< number of records on the page */
+	ulint	page_no;	/*!< no of the page containing the record */
+	ulint	page_level;	/*!< level of the page, if later we fetch
+				the page under page_no and it is no different
+				level then we know that the tree has been
+				reorganized */
 };
 
 #define BTR_PATH_ARRAY_N_SLOTS	250	/*!< size of path array (in slots) */
diff --git a/storage/xtradb/include/btr0cur.ic b/storage/xtradb/include/btr0cur.ic
index 280583f6ccf..c833b3e8572 100644
--- a/storage/xtradb/include/btr0cur.ic
+++ b/storage/xtradb/include/btr0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -139,7 +139,7 @@ btr_cur_compress_recommendation(
 	btr_cur_t*	cursor,	/*!< in: btr cursor */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*		page;
+	const page_t*		page;
 
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));
diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h
index 2334a266280..f59514d04b3 100644
--- a/storage/xtradb/include/btr0pcur.h
+++ b/storage/xtradb/include/btr0pcur.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -244,18 +244,6 @@ btr_pcur_restore_position_func(
 	mtr_t*		mtr);		/*!< in: mtr */
 #define btr_pcur_restore_position(l,cur,mtr)				\
 	btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
-/**************************************************************//**
-If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
-releases the page latch and bufferfix reserved by the cursor.
-NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
-made by the current mini-transaction to the data protected by the
-cursor latch, as then the latch must not be released until mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_release_leaf(
-/*==================*/
-	btr_pcur_t*	cursor, /*!< in: persistent cursor */
-	mtr_t*		mtr);	/*!< in: mtr */
 /*********************************************************//**
 Gets the rel_pos field for a cursor whose position has been stored.
 @return	BTR_PCUR_ON, ... */
@@ -282,10 +270,9 @@ btr_pcur_get_mtr(
 	btr_pcur_t*	cursor);	/*!< in: persistent cursor */
 /**************************************************************//**
 Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached. If there have been modifications
-to the page where pcur is positioned, this can be used instead of
-btr_pcur_release_leaf. Function btr_pcur_store_position should be used
-before calling this, if restoration of cursor is wanted later. */
+that is, the cursor becomes detached.
+Function btr_pcur_store_position should be used before calling this,
+if restoration of cursor is wanted later. */
 UNIV_INLINE
 void
 btr_pcur_commit_specify_mtr(
diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic
index 0c38797e6c5..0f9b969e7c5 100644
--- a/storage/xtradb/include/btr0pcur.ic
+++ b/storage/xtradb/include/btr0pcur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -396,10 +396,9 @@ btr_pcur_move_to_next(
 
 /**************************************************************//**
 Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached. If there have been modifications
-to the page where pcur is positioned, this can be used instead of
-btr_pcur_release_leaf. Function btr_pcur_store_position should be used
-before calling this, if restoration of cursor is wanted later. */
+that is, the cursor becomes detached.
+Function btr_pcur_store_position should be used before calling this,
+if restoration of cursor is wanted later. */
 UNIV_INLINE
 void
 btr_pcur_commit_specify_mtr(
diff --git a/storage/xtradb/include/btr0types.h b/storage/xtradb/include/btr0types.h
index ef4a6b04b34..07c06fb18d7 100644
--- a/storage/xtradb/include/btr0types.h
+++ b/storage/xtradb/include/btr0types.h
@@ -38,6 +38,131 @@ typedef struct btr_cur_struct		btr_cur_t;
 /** B-tree search information for the adaptive hash index */
 typedef struct btr_search_struct	btr_search_t;
 
+#ifdef UNIV_BLOB_DEBUG
+# include "buf0types.h"
+/** An index->blobs entry for keeping track of off-page column references */
+typedef struct btr_blob_dbg_struct btr_blob_dbg_t;
+
+/** Insert to index->blobs a reference to an off-page column.
+@param index	the index tree
+@param b	the reference
+@param ctx	context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_insert(
+/*====================*/
+	dict_index_t*		index,	/*!< in/out: index tree */
+	const btr_blob_dbg_t*	b,	/*!< in: the reference */
+	const char*		ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+
+/** Remove from index->blobs a reference to an off-page column.
+@param index	the index tree
+@param b	the reference
+@param ctx	context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_delete(
+/*====================*/
+	dict_index_t*		index,	/*!< in/out: index tree */
+	const btr_blob_dbg_t*	b,	/*!< in: the reference */
+	const char*		ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+
+/**************************************************************//**
+Add to index->blobs any references to off-page columns from a record.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add_rec(
+/*=================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in/out: index */
+	const ulint*	offsets,/*!< in: offsets */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Remove from index->blobs any references to off-page columns from a record.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove_rec(
+/*====================*/
+	const rec_t*	rec,	/*!< in: record */
+	dict_index_t*	index,	/*!< in/out: index */
+	const ulint*	offsets,/*!< in: offsets */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Count and add to index->blobs any references to off-page columns
+from records on a page.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add(
+/*=============*/
+	const page_t*	page,	/*!< in: rewritten page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Count and remove from index->blobs any references to off-page columns
+from records on a page.
+Used when reorganizing a page, before copying the records.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove(
+/*================*/
+	const page_t*	page,	/*!< in: b-tree page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+/**************************************************************//**
+Restore in index->blobs any references to off-page columns
+Used when page reorganize fails due to compressed page overflow. */
+UNIV_INTERN
+void
+btr_blob_dbg_restore(
+/*=================*/
+	const page_t*	npage,	/*!< in: page that failed to compress */
+	const page_t*	page,	/*!< in: copy of original page */
+	dict_index_t*	index,	/*!< in/out: index */
+	const char*	ctx)	/*!< in: context (for logging) */
+	__attribute__((nonnull));
+
+/** Operation that processes the BLOB references of an index record
+@param[in]	rec	record on index page
+@param[in/out]	index	the index tree of the record
+@param[in]	offsets	rec_get_offsets(rec,index)
+@param[in]	ctx	context (for logging)
+@return			number of BLOB references processed */
+typedef ulint (*btr_blob_dbg_op_f)
+(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
+
+/**************************************************************//**
+Count and process all references to off-page columns on a page.
+@return number of references processed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_op(
+/*============*/
+	const page_t*		page,	/*!< in: B-tree leaf page */
+	const rec_t*		rec,	/*!< in: record to start from
+					(NULL to process the whole page) */
+	dict_index_t*		index,	/*!< in/out: index */
+	const char*		ctx,	/*!< in: context (for logging) */
+	const btr_blob_dbg_op_f	op)	/*!< in: operation on records */
+	__attribute__((nonnull(1,3,4,5)));
+#else /* UNIV_BLOB_DEBUG */
+# define btr_blob_dbg_add_rec(rec, index, offsets, ctx)		((void) 0)
+# define btr_blob_dbg_add(page, index, ctx)			((void) 0)
+# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx)	((void) 0)
+# define btr_blob_dbg_remove(page, index, ctx)			((void) 0)
+# define btr_blob_dbg_restore(npage, page, index, ctx)		((void) 0)
+# define btr_blob_dbg_op(page, rec, index, ctx, op)		((void) 0)
+#endif /* UNIV_BLOB_DEBUG */
+
 /** The size of a reference to data stored on a different page.
 The reference is stored at the end of the prefix of the field
 in the index record. */
diff --git a/storage/xtradb/include/buf0buddy.h b/storage/xtradb/include/buf0buddy.h
index 3a35f8e46e9..f4c3da8692d 100644
--- a/storage/xtradb/include/buf0buddy.h
+++ b/storage/xtradb/include/buf0buddy.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -37,25 +37,20 @@ Created December 2006 by Marko Makela
 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any
-block->mutex.  The buf_pool_mutex may only be released and reacquired
-if lru != NULL.  This function should only be used for allocating
-compressed page frames or control blocks (buf_page_t).  Allocated
-control blocks must be properly initialized immediately after
-buf_buddy_alloc() has returned the memory, before releasing
-buf_pool_mutex.
-@return	allocated block, possibly NULL if lru == NULL */
+block->mutex.  The buf_pool_mutex may be released and reacquired.
+This function should only be used for allocating compressed page frames.
+@return	allocated block, never NULL */
 UNIV_INLINE
 void*
 buf_buddy_alloc(
 /*============*/
-	ulint	size,	/*!< in: block size, up to UNIV_PAGE_SIZE */
+	ulint	size,	/*!< in: compressed page size
+			(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
 	ibool*	lru,	/*!< in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
-			or NULL if the LRU list should not be used */
+			and buf_pool_mutex was temporarily released */
 	ibool	have_page_hash_mutex)
-	__attribute__((malloc));
-
+	__attribute__((malloc, nonnull));
 /**********************************************************************//**
 Release a block. */
 UNIV_INLINE
diff --git a/storage/xtradb/include/buf0buddy.ic b/storage/xtradb/include/buf0buddy.ic
index 69659fb69d6..63241311e1f 100644
--- a/storage/xtradb/include/buf0buddy.ic
+++ b/storage/xtradb/include/buf0buddy.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,8 +36,8 @@ Created December 2006 by Marko Makela
 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
-The buf_pool_mutex may only be released and reacquired if lru != NULL.
-@return	allocated block, possibly NULL if lru==NULL */
+The buf_pool_mutex may be released and reacquired.
+@return	allocated block, never NULL */
 UNIV_INTERN
 void*
 buf_buddy_alloc_low(
@@ -46,10 +46,9 @@ buf_buddy_alloc_low(
 			or BUF_BUDDY_SIZES */
 	ibool*	lru,	/*!< in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
-			or NULL if the LRU list should not be used */
+			and buf_pool_mutex was temporarily released */
 	ibool	have_page_hash_mutex)
-	__attribute__((malloc));
+	__attribute__((malloc, nonnull));
 
 /**********************************************************************//**
 Deallocate a block. */
@@ -76,6 +75,8 @@ buf_buddy_get_slot(
 	ulint	i;
 	ulint	s;
 
+	ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+
 	for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
 	}
 
@@ -86,27 +87,26 @@ buf_buddy_get_slot(
 /**********************************************************************//**
 Allocate a block.  The thread calling this function must hold
 buf_pool_mutex and must not hold buf_pool_zip_mutex or any
-block->mutex.  The buf_pool_mutex may only be released and reacquired
-if lru != NULL.  This function should only be used for allocating
-compressed page frames or control blocks (buf_page_t).  Allocated
-control blocks must be properly initialized immediately after
-buf_buddy_alloc() has returned the memory, before releasing
-buf_pool_mutex.
-@return	allocated block, possibly NULL if lru == NULL */
+block->mutex.  The buf_pool_mutex may be released and reacquired.
+This function should only be used for allocating compressed page frames.
+@return	allocated block, never NULL */
 UNIV_INLINE
 void*
 buf_buddy_alloc(
 /*============*/
-	ulint	size,	/*!< in: block size, up to UNIV_PAGE_SIZE */
+	ulint	size,	/*!< in: compressed page size
+			(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
 	ibool*	lru,	/*!< in: pointer to a variable that will be assigned
 			TRUE if storage was allocated from the LRU list
-			and buf_pool_mutex was temporarily released,
-			or NULL if the LRU list should not be used */
+			and buf_pool_mutex was temporarily released */
 	ibool	have_page_hash_mutex)
 {
 	//ut_ad(buf_pool_mutex_own());
+	ut_ad(ut_is_2pow(size));
+	ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+	ut_ad(size <= UNIV_PAGE_SIZE);
 
-	return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+	return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
 }
 
 /**********************************************************************//**
@@ -121,6 +121,9 @@ buf_buddy_free(
 	ibool	have_page_hash_mutex)
 {
 	//ut_ad(buf_pool_mutex_own());
+	ut_ad(ut_is_2pow(size));
+	ut_ad(size >= PAGE_ZIP_MIN_SIZE);
+	ut_ad(size <= UNIV_PAGE_SIZE);
 
 	if (!have_page_hash_mutex) {
 		mutex_enter(&LRU_list_mutex);
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index bc0e9170281..838dd7f3900 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,12 +36,13 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0rbt.h"
 #ifndef UNIV_HOTBACKUP
 #include "os0proc.h"
-#include "srv0srv.h"
 
 /** @name Modes for buf_page_get_gen */
 /* @{ */
 #define BUF_GET			10	/*!< get always */
 #define	BUF_GET_IF_IN_POOL	11	/*!< get if in pool */
+#define BUF_PEEK_IF_IN_POOL	12	/*!< get if in pool, do not make
+					the block young in the LRU list */
 #define BUF_GET_NO_LATCH	14	/*!< get and bufferfix, but
 					set no latch; we have
 					separated this case, because
@@ -140,12 +141,6 @@ buf_relocate(
 				BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
 	buf_page_t*	dpage)	/*!< in/out: destination control block */
 	__attribute__((nonnull));
-/********************************************************************//**
-Resizes the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_resize(void);
-/*=================*/
 /*********************************************************************//**
 Gets the current size of buffer buf_pool in bytes.
 @return	size in bytes */
@@ -162,6 +157,23 @@ ib_uint64_t
 buf_pool_get_oldest_modification(void);
 /*==================================*/
 /********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+	__attribute__((malloc));
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: bpage descriptor to free. */
+	__attribute__((nonnull));
+
+/********************************************************************//**
 Allocates a buffer block.
 @return	own: the allocated block, in state BUF_BLOCK_MEMORY */
 UNIV_INLINE
@@ -285,7 +297,7 @@ buf_page_get_gen(
 	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
 	buf_block_t*	guess,	/*!< in: guessed block or NULL */
 	ulint		mode,	/*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
-				BUF_GET_NO_LATCH */
+				BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
 	const char*	file,	/*!< in: file name */
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mini-transaction */
@@ -415,6 +427,18 @@ buf_block_get_freed_page_clock(
 	__attribute__((pure));
 
 /********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return	TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+	const buf_page_t*	bpage);	/*!< in: block */
+/********************************************************************//**
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
@@ -466,6 +490,31 @@ buf_block_get_modify_clock(
 #else /* !UNIV_HOTBACKUP */
 # define buf_block_modify_clock_inc(block) ((void) 0)
 #endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+#ifdef UNIV_SYNC_DEBUG
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line */
+#endif /* UNIV_SYNC_DEBUG */
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+	__attribute__((nonnull));
+#ifdef UNIV_SYNC_DEBUG
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+#else /* UNIV_SYNC_DEBUG */
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+#endif /* UNIV_SYNC_DEBUG */
 /********************************************************************//**
 Calculates a page checksum which is stored to the page when it is written
 to a file. Note that we must be careful to calculate the same value
@@ -986,8 +1035,7 @@ UNIV_INTERN
 void
 buf_page_io_complete(
 /*=================*/
-	buf_page_t*	bpage,	/*!< in: pointer to the block in question */
-	trx_t*		trx);
+	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
 /********************************************************************//**
 Calculates a folded value of a file page address to use in the page hash
 table.
@@ -1301,10 +1349,7 @@ struct buf_block_struct{
 /**********************************************************************//**
 Compute the hash fold value for blocks in buf_pool->zip_hash. */
 /* @{ */
-/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\
-					?((ulint) (ptr) / UNIV_PAGE_SIZE)\
-					:((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
 /* @} */
@@ -1330,6 +1375,8 @@ struct buf_pool_stat_struct{
 	ulint	n_pages_written;/*!< number write operations */
 	ulint	n_pages_created;/*!< number of pages created
 				in the pool with no read */
+	ulint	n_ra_pages_read_rnd;/*!< number of pages read in
+				as part of random read ahead */
 	ulint	n_ra_pages_read;/*!< number of pages read in
 				as part of read ahead */
 	ulint	n_ra_pages_evicted;/*!< number of read ahead
@@ -1453,8 +1500,10 @@ struct buf_pool_struct{
 	frames and buf_page_t descriptors of blocks that exist
 	in the buffer pool only in compressed form. */
 	/* @{ */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 	UT_LIST_BASE_NODE_T(buf_page_t)	zip_clean;
 					/*!< unmodified compressed pages */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 	UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX];
 					/*!< buddy free lists */
 //#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
@@ -1486,8 +1535,8 @@ Use these instead of accessing buf_pool_mutex directly. */
 /** Test if buf_pool_mutex is owned. */
 #define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
 /** Acquire the buffer pool mutex. */
+/* the buf_pool_mutex is changed the latch order */
 #define buf_pool_mutex_enter() do {		\
-	ut_ad(!mutex_own(&buf_pool_zip_mutex));	\
 	mutex_enter(&buf_pool_mutex);		\
 } while (0)
 
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index 2cb0d8ef497..a081d6a34c0 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -62,6 +62,27 @@ buf_block_get_freed_page_clock(
 }
 
 /********************************************************************//**
+Tells if a block is still close enough to the MRU end of the LRU list
+meaning that it is not in danger of getting evicted and also implying
+that it has been accessed recently.
+Note that this is for heuristics only and does not reserve buffer pool
+mutex.
+@return	TRUE if block is close to MRU end of LRU */
+UNIV_INLINE
+ibool
+buf_page_peek_if_young(
+/*===================*/
+	const buf_page_t*	bpage)	/*!< in: block */
+{
+	/* FIXME: bpage->freed_page_clock is 31 bits */
+	return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
+	       < ((ulint) bpage->freed_page_clock
+		  + (buf_pool->curr_size
+		     * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
+		     / (BUF_LRU_OLD_RATIO_DIV * 4))));
+}
+
+/********************************************************************//**
 Recommends a move of a block to the start of the LRU list if there is danger
 of dropping from the buffer pool. NOTE: does not reserve the buffer pool
 mutex.
@@ -89,12 +110,7 @@ buf_page_peek_if_too_old(
 		buf_pool->stat.n_pages_not_made_young++;
 		return(FALSE);
 	} else {
-		/* FIXME: bpage->freed_page_clock is 31 bits */
-		return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
-		       > ((ulint) bpage->freed_page_clock
-			  + (buf_pool->curr_size
-			     * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
-			     / (BUF_LRU_OLD_RATIO_DIV * 4))));
+		return(!buf_page_peek_if_young(bpage));
 	}
 }
 
@@ -754,6 +770,35 @@ buf_block_get_lock_hash_val(
 }
 
 /********************************************************************//**
+Allocates a buf_page_t descriptor. This function must succeed. In case
+of failure we assert in this function.
+@return: the allocated descriptor. */
+UNIV_INLINE
+buf_page_t*
+buf_page_alloc_descriptor(void)
+/*===========================*/
+{
+	buf_page_t*	bpage;
+
+	bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
+	ut_d(memset(bpage, 0, sizeof *bpage));
+	UNIV_MEM_ALLOC(bpage, sizeof *bpage);
+
+	return(bpage);
+}
+
+/********************************************************************//**
+Free a buf_page_t descriptor. */
+UNIV_INLINE
+void
+buf_page_free_descriptor(
+/*=====================*/
+	buf_page_t*	bpage)	/*!< in: bpage descriptor to free. */
+{
+	ut_free(bpage);
+}
+
+/********************************************************************//**
 Allocates a buffer block.
 @return	own: the allocated block, in state BUF_BLOCK_MEMORY */
 UNIV_INLINE
@@ -910,19 +955,6 @@ buf_block_buf_fix_inc_func(
 
 	block->page.buf_fix_count++;
 }
-#ifdef UNIV_SYNC_DEBUG
-/** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-#else /* UNIV_SYNC_DEBUG */
-/** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-#endif /* UNIV_SYNC_DEBUG */
 
 /*******************************************************************//**
 Decrements the bufferfix count. */
@@ -1119,7 +1151,7 @@ buf_block_dbg_add_level(
 				where we have acquired latch */
 	ulint		level)	/*!< in: latching order level */
 {
-	sync_thread_add_level(&block->lock, level);
+	sync_thread_add_level(&block->lock, level, FALSE);
 }
 #endif /* UNIV_SYNC_DEBUG */
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h
index fe7c067dfb7..8abebfb675c 100644
--- a/storage/xtradb/include/buf0lru.h
+++ b/storage/xtradb/include/buf0lru.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "ut0byte.h"
 #include "buf0types.h"
 
-/** The return type of buf_LRU_free_block() */
-enum buf_lru_free_block_status {
-	/** freed */
-	BUF_LRU_FREED = 0,
-	/** not freed because the caller asked to remove the
-	uncompressed frame but the control block cannot be
-	relocated */
-	BUF_LRU_CANNOT_RELOCATE,
-	/** not freed because of some other reason */
-	BUF_LRU_NOT_FREED
-};
-
 /******************************************************************//**
 Tries to remove LRU flushed blocks from the end of the LRU list and put them
 to the free list. This is beneficial for the efficiency of the insert buffer
@@ -91,6 +79,7 @@ void
 buf_LRU_mark_space_was_deleted(
 /*===========================*/
 	ulint	id);	/*!< in: space id */
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
 UNIV_INTERN
@@ -98,22 +87,22 @@ void
 buf_LRU_insert_zip_clean(
 /*=====================*/
 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 /******************************************************************//**
 Try to free a block.  If bpage is a descriptor of a compressed-only
 page, the descriptor object will be freed as well.
 
-NOTE: If this function returns BUF_LRU_FREED, it will temporarily
+NOTE: If this function returns TRUE, it will temporarily
 release buf_pool_mutex.  Furthermore, the page frame will no longer be
 accessible via bpage.
 
 The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
 release these two mutexes after the call.  No other
 buf_page_get_mutex() may be held when calling this function.
-@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
-BUF_LRU_NOT_FREED otherwise. */
+@return TRUE if freed, FALSE otherwise. */
 UNIV_INTERN
-enum buf_lru_free_block_status
+ibool
 buf_LRU_free_block(
 /*===============*/
 	buf_page_t*	bpage,	/*!< in: block to be freed */
diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h
index ce3e5ecc9c5..9dd847bdaca 100644
--- a/storage/xtradb/include/buf0types.h
+++ b/storage/xtradb/include/buf0types.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri
 #ifndef buf0types_h
 #define buf0types_h
 
+#include "page0types.h"
+
 /** Buffer page (uncompressed or compressed) */
 typedef	struct buf_page_struct		buf_page_t;
 /** Buffer block for which an uncompressed page exists */
@@ -58,17 +60,10 @@ enum buf_io_fix {
 
 /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
 /* @{ */
-#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
-/** Base-2 logarithm of the smallest buddy block size */
-# define BUF_BUDDY_LOW_SHIFT	6
-#else /* 64-bit system */
-/** Base-2 logarithm of the smallest buddy block size */
-# define BUF_BUDDY_LOW_SHIFT	7
-#endif
+#define BUF_BUDDY_LOW_SHIFT	PAGE_ZIP_MIN_SIZE_SHIFT
+
 #define BUF_BUDDY_LOW		(1 << BUF_BUDDY_LOW_SHIFT)
-					/*!< minimum block size in the binary
-					buddy system; must be at least
-					sizeof(buf_page_t) */
+
 #define BUF_BUDDY_SIZES		(UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
 #define BUF_BUDDY_SIZES_MAX	(UNIV_PAGE_SIZE_SHIFT_MAX - BUF_BUDDY_LOW_SHIFT)
 					/*!< number of buddy sizes */
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index f47293bedf6..3554274847c 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -340,6 +340,13 @@ struct dict_index_struct{
 				index, or 0 if the index existed
 				when InnoDB was started up */
 #endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_BLOB_DEBUG
+	mutex_t		blobs_mutex;
+				/*!< mutex protecting blobs */
+	void*		blobs;	/*!< map of (page_no,heap_no,field_no)
+				to first_blob_page_no; protected by
+				blobs_mutex; @see btr_blob_dbg_t */
+#endif /* UNIV_BLOB_DEBUG */
 #ifdef UNIV_DEBUG
 	ulint		magic_n;/*!< magic number */
 /** Value of dict_index_struct::magic_n */
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index fbf8ca20db3..11c4cb4ba03 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -670,8 +670,9 @@ UNIV_INTERN
 void
 fil_flush(
 /*======*/
-	ulint	space_id);	/*!< in: file space id (this can be a group of
+	ulint	space_id,	/*!< in: file space id (this can be a group of
 				log files or a tablespace of the database) */
+	ibool	metadata);
 /**********************************************************************//**
 Flushes to disk writes in file spaces of the given type possibly cached by
 the OS. */
diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h
index 492c767acc4..b17c21a45ef 100644
--- a/storage/xtradb/include/hash0hash.h
+++ b/storage/xtradb/include/hash0hash.h
@@ -49,28 +49,6 @@ hash_table_t*
 hash_create(
 /*========*/
 	ulint	n);	/*!< in: number of array cells */
-
-/*************************************************************//**
-*/
-UNIV_INTERN
-ulint
-hash_create_needed(
-/*===============*/
-	ulint	n);
-
-UNIV_INTERN
-void
-hash_create_init(
-/*=============*/
-	hash_table_t*	table,
-	ulint		n);
-
-UNIV_INTERN
-void
-hash_create_reuse(
-/*==============*/
-	hash_table_t*	table);
-
 #ifndef UNIV_HOTBACKUP
 /*************************************************************//**
 Creates a mutex array to protect a hash table. */
@@ -350,33 +328,6 @@ do {\
 	}\
 } while (0)
 
-/********************************************************************//**
-Align nodes with moving location.*/
-#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
-do {\
-	ulint		i2222;\
-	ulint		cell_count2222;\
-\
-	cell_count2222 = hash_get_n_cells(TABLE);\
-\
-	for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
-		NODE_TYPE*	node2222;\
-\
-		if ((TABLE)->array[i2222].node) \
-			(TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
-			+ (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
-		node2222 = HASH_GET_FIRST((TABLE), i2222);\
-\
-		while (node2222) {\
-			if (node2222->PTR_NAME) \
-				node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
-				+ ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
-\
-			node2222 = node2222->PTR_NAME;\
-		}\
-	}\
-} while (0)
-
 /************************************************************//**
 Gets the mutex index for a fold value in a hash table.
 @return	mutex number */
diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h
index bc3f1951be9..8a9ec8ea7f0 100644
--- a/storage/xtradb/include/mtr0mtr.h
+++ b/storage/xtradb/include/mtr0mtr.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -213,16 +213,6 @@ ulint
 mtr_set_savepoint(
 /*==============*/
 	mtr_t*	mtr);	/*!< in: mtr */
-/**********************************************************//**
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-UNIV_INTERN
-void
-mtr_rollback_to_savepoint(
-/*======================*/
-	mtr_t*	mtr,		/*!< in: mtr */
-	ulint	savepoint);	/*!< in: savepoint */
 #ifndef UNIV_HOTBACKUP
 /**********************************************************//**
 Releases the (index tree) s-latch stored in an mtr memo after a
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 46bda4c6b45..5db7dc88b8f 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -473,7 +473,8 @@ UNIV_INTERN
 ibool
 os_file_flush(
 /*==========*/
-	os_file_t	file);	/*!< in, own: handle to a file */
+	os_file_t	file,	/*!< in, own: handle to a file */
+	ibool		metadata);
 /***********************************************************************//**
 Retrieves the last error number if an error occurs in a file io function.
 The number should be retrieved before any other OS calls (because they may
diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h
index 582cef6f803..fd46bd7db87 100644
--- a/storage/xtradb/include/os0proc.h
+++ b/storage/xtradb/include/os0proc.h
@@ -32,11 +32,6 @@ Created 9/30/1995 Heikki Tuuri
 #ifdef UNIV_LINUX
 #include <sys/ipc.h>
 #include <sys/shm.h>
-#else
-# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
-#include <sys/ipc.h>
-#include <sys/shm.h>
-# endif
 #endif
 
 typedef void*			os_process_t;
@@ -75,29 +70,6 @@ os_mem_free_large(
 	ulint	size);			/*!< in: size returned by
 					os_mem_alloc_large() */
 
-
-/****************************************************************//**
-Allocates or attaches and reuses shared memory segment.
-The content is not cleared automatically.
-@return	allocated memory */
-UNIV_INTERN
-void*
-os_shm_alloc(
-/*=========*/
-	ulint*	n,			/*!< in/out: number of bytes */
-	uint	key,
-	ibool*	is_new);
-
-/****************************************************************//**
-Detach shared memory segment. */
-UNIV_INTERN
-void
-os_shm_free(
-/*========*/
-	void	*ptr,			/*!< in: pointer returned by
-					os_shm_alloc() */
-	ulint	size);			/*!< in: size returned by
-					os_shm_alloc() */
 #ifndef UNIV_NONINL
 #include "os0proc.ic"
 #endif
diff --git a/storage/xtradb/include/page0cur.ic b/storage/xtradb/include/page0cur.ic
index 3520677dfb3..81474fa35f5 100644
--- a/storage/xtradb/include/page0cur.ic
+++ b/storage/xtradb/include/page0cur.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,6 +27,8 @@ Created 10/4/1994 Heikki Tuuri
 #include "buf0types.h"
 
 #ifdef UNIV_DEBUG
+# include "rem0cmp.h"
+
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
 @return	page */
@@ -268,6 +270,7 @@ page_cur_tuple_insert(
 					      index, rec, offsets, mtr);
 	}
 
+	ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, offsets));
 	mem_heap_free(heap);
 	return(rec);
 }
diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h
index 5b2bcf7c054..aeaef030505 100644
--- a/storage/xtradb/include/page0page.h
+++ b/storage/xtradb/include/page0page.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -284,16 +284,42 @@ page_get_supremum_offset(
 	const page_t*	page);	/*!< in: page which must have record(s) */
 #define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
 #define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
+
 /************************************************************//**
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list.
-@return	middle record */
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
 UNIV_INTERN
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		nth)	/*!< in: nth record */
+	__attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+	page_t*	page,	/*< in: page */
+	ulint	nth)	/*!< in: nth record */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return	middle record */
+UNIV_INLINE
 rec_t*
 page_get_middle_rec(
 /*================*/
-	page_t*	page);	/*!< in: page */
-#ifndef UNIV_HOTBACKUP
+	page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Compares a data tuple to a physical record. Differs from the function
 cmp_dtuple_rec_with_match in the way that the record must reside on an
@@ -348,6 +374,7 @@ page_get_n_recs(
 /***************************************************************//**
 Returns the number of records before the given record in chain.
 The number includes infimum and supremum records.
+This is the inverse function of page_rec_get_nth().
 @return	number of records */
 UNIV_INTERN
 ulint
diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic
index dab9dc742e4..b34408aed17 100644
--- a/storage/xtradb/include/page0page.ic
+++ b/storage/xtradb/include/page0page.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -423,7 +423,37 @@ page_rec_is_infimum(
 	return(page_rec_is_infimum_low(page_offset(rec)));
 }
 
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+	page_t*	page,	/*!< in: page */
+	ulint	nth)	/*!< in: nth record */
+{
+	return((rec_t*) page_rec_get_nth_const(page, nth));
+}
+
 #ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return	middle record */
+UNIV_INLINE
+rec_t*
+page_get_middle_rec(
+/*================*/
+	page_t*	page)	/*!< in: page */
+{
+	ulint	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+
+	return(page_rec_get_nth(page, middle));
+}
+
 /*************************************************************//**
 Compares a data tuple to a physical record. Differs from the function
 cmp_dtuple_rec_with_match in the way that the record must reside on an
diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h
index 4d37302ed20..fe3d2e52e0b 100644
--- a/storage/xtradb/include/page0zip.h
+++ b/storage/xtradb/include/page0zip.h
@@ -420,7 +420,7 @@ page_zip_copy_recs(
 	const page_t*		src,		/*!< in: page */
 	dict_index_t*		index,		/*!< in: index of the B-tree */
 	mtr_t*			mtr)		/*!< in: mini-transaction */
-	__attribute__((nonnull(1,2,3,4)));
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h
index 17d08afabb9..06de23be757 100644
--- a/storage/xtradb/include/rem0rec.h
+++ b/storage/xtradb/include/rem0rec.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -480,6 +480,18 @@ ulint
 rec_offs_any_extern(
 /*================*/
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return	first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+	const rec_t*	rec,		/*!< in: record */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
 @return	nonzero if externally stored */
diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic
index fa96c97f95e..3875a4cf814 100644
--- a/storage/xtradb/include/rem0rec.ic
+++ b/storage/xtradb/include/rem0rec.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,7 @@ Created 5/30/1994 Heikki Tuuri
 #include "mach0data.h"
 #include "ut0byte.h"
 #include "dict0dict.h"
+#include "btr0types.h"
 
 /* Compact flag ORed to the extra size returned by rec_get_offsets() */
 #define REC_OFFS_COMPACT	((ulint) 1 << 31)
@@ -1087,6 +1088,44 @@ rec_offs_any_extern(
 	return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
 }
 
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return	first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+	const rec_t*	rec,		/*!< in: record */
+	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
+{
+	ulint	i;
+	ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+	if (!rec_offs_any_extern(offsets)) {
+		return(NULL);
+	}
+
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		if (rec_offs_nth_extern(offsets, i)) {
+			ulint		len;
+			const byte*	field
+				= rec_get_nth_field(rec, offsets, i, &len);
+
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			if (!memcmp(field + len
+				    - BTR_EXTERN_FIELD_REF_SIZE,
+				    field_ref_zero,
+				    BTR_EXTERN_FIELD_REF_SIZE)) {
+				return(field);
+			}
+		}
+	}
+
+	return(NULL);
+}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
 @return	nonzero if externally stored */
diff --git a/storage/xtradb/include/row0row.h b/storage/xtradb/include/row0row.h
index 723b7b53395..36fb26482ce 100644
--- a/storage/xtradb/include/row0row.h
+++ b/storage/xtradb/include/row0row.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,16 +38,16 @@ Created 4/20/1996 Heikki Tuuri
 #include "btr0types.h"
 
 /*********************************************************************//**
-Gets the offset of the trx id field, in bytes relative to the origin of
+Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
 a clustered index record.
 @return	offset of DATA_TRX_ID */
-UNIV_INTERN
+UNIV_INLINE
 ulint
 row_get_trx_id_offset(
 /*==================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in: clustered index */
-	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: record offsets */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Reads the trx id field from a clustered index record.
 @return	value of the field */
@@ -55,9 +55,10 @@ UNIV_INLINE
 trx_id_t
 row_get_rec_trx_id(
 /*===============*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in: clustered index */
-	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Reads the roll pointer field from a clustered index record.
 @return	value of the field */
@@ -65,9 +66,10 @@ UNIV_INLINE
 roll_ptr_t
 row_get_rec_roll_ptr(
 /*=================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in: clustered index */
-	const ulint*	offsets);/*!< in: rec_get_offsets(rec, index) */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
+	__attribute__((nonnull, warn_unused_result));
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
diff --git a/storage/xtradb/include/row0row.ic b/storage/xtradb/include/row0row.ic
index 05c007641af..0b9ca982af8 100644
--- a/storage/xtradb/include/row0row.ic
+++ b/storage/xtradb/include/row0row.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,15 +28,42 @@ Created 4/20/1996 Heikki Tuuri
 #include "trx0undo.h"
 
 /*********************************************************************//**
+Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
+a clustered index record.
+@return	offset of DATA_TRX_ID */
+UNIV_INLINE
+ulint
+row_get_trx_id_offset(
+/*==================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: record offsets */
+{
+	ulint	pos;
+	ulint	offset;
+	ulint	len;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(NULL, index, offsets));
+
+	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+	offset = rec_get_nth_field_offs(offsets, pos, &len);
+
+	ut_ad(len == DATA_TRX_ID_LEN);
+
+	return(offset);
+}
+
+/*********************************************************************//**
 Reads the trx id field from a clustered index record.
 @return	value of the field */
 UNIV_INLINE
 trx_id_t
 row_get_rec_trx_id(
 /*===============*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in: clustered index */
-	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	ulint	offset;
 
@@ -46,7 +73,7 @@ row_get_rec_trx_id(
 	offset = index->trx_id_offset;
 
 	if (!offset) {
-		offset = row_get_trx_id_offset(rec, index, offsets);
+		offset = row_get_trx_id_offset(index, offsets);
 	}
 
 	return(trx_read_trx_id(rec + offset));
@@ -59,9 +86,9 @@ UNIV_INLINE
 roll_ptr_t
 row_get_rec_roll_ptr(
 /*=================*/
-	const rec_t*	rec,	/*!< in: record */
-	dict_index_t*	index,	/*!< in: clustered index */
-	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
+	const rec_t*		rec,	/*!< in: record */
+	const dict_index_t*	index,	/*!< in: clustered index */
+	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
 {
 	ulint	offset;
 
@@ -71,7 +98,7 @@ row_get_rec_roll_ptr(
 	offset = index->trx_id_offset;
 
 	if (!offset) {
-		offset = row_get_trx_id_offset(rec, index, offsets);
+		offset = row_get_trx_id_offset(index, offsets);
 	}
 
 	return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic
index 18e22f1eca9..0894ed373b0 100644
--- a/storage/xtradb/include/row0upd.ic
+++ b/storage/xtradb/include/row0upd.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -171,7 +171,7 @@ row_upd_rec_sys_fields(
 		ulint	offset = index->trx_id_offset;
 
 		if (!offset) {
-			offset = row_get_trx_id_offset(rec, index, offsets);
+			offset = row_get_trx_id_offset(index, offsets);
 		}
 
 #if DATA_TRX_ID + 1 != DATA_ROLL_PTR
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 29d88331532..49be9a2ec3a 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -161,13 +161,10 @@ extern ulint	srv_buf_pool_curr_size;	/*!< current size in bytes */
 extern ulint	srv_mem_pool_size;
 extern ulint	srv_lock_table_size;
 
-extern uint	srv_buffer_pool_shm_key;
-extern ibool	srv_buffer_pool_shm_is_reused;
-extern ibool	srv_buffer_pool_shm_checksum;
-
 extern ibool	srv_thread_concurrency_timer_based;
 
 extern ulint	srv_n_file_io_threads;
+extern my_bool	srv_random_read_ahead;
 extern ulong	srv_read_ahead_threshold;
 extern ulint	srv_n_read_io_threads;
 extern ulint	srv_n_write_io_threads;
@@ -290,6 +287,7 @@ extern	ibool	srv_print_latch_waits;
 extern ulint	srv_activity_count;
 extern ulint	srv_fatal_semaphore_wait_threshold;
 extern ulint	srv_dml_needed_delay;
+extern long long	srv_kill_idle_transaction;
 
 extern mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
 				query threads, and lock table: we allocate
@@ -353,6 +351,9 @@ extern ulint srv_buf_pool_reads;
 /** Time in seconds between automatic buffer pool dumps */
 extern uint srv_auto_lru_dump;
 
+/** Whether startup should be blocked until buffer pool is fully restored */
+extern ibool srv_blocking_lru_restore;
+
 /** Status variables to be passed to MySQL */
 typedef struct export_var_struct export_struc;
 
@@ -696,6 +697,7 @@ struct export_var_struct{
 	ulint innodb_buffer_pool_wait_free;	/*!< srv_buf_pool_wait_free */
 	ulint innodb_buffer_pool_pages_flushed;	/*!< srv_buf_pool_flushed */
 	ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+	ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
 	ulint innodb_buffer_pool_read_ahead;	/*!< srv_read_ahead */
 	ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
         ulint innodb_deadlocks;                 /* ??? */
diff --git a/storage/xtradb/include/sync0arr.h b/storage/xtradb/include/sync0arr.h
index 5f1280f5e28..6e931346238 100644
--- a/storage/xtradb/include/sync0arr.h
+++ b/storage/xtradb/include/sync0arr.h
@@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr.
 @return	TRUE if fatal semaphore wait threshold was exceeded */
 UNIV_INTERN
 ibool
-sync_array_print_long_waits(void);
-/*=============================*/
+sync_array_print_long_waits(
+/*========================*/
+	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
+	const void**	sema)	/*!< out: longest-waited-for semaphore */
+	__attribute__((nonnull));
 /********************************************************************//**
 Validates the integrity of the wait array. Checks
 that the number of reserved cells equals the count variable. */
diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic
index 7116f1b7c9b..485a63a1b18 100644
--- a/storage/xtradb/include/sync0rw.ic
+++ b/storage/xtradb/include/sync0rw.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -603,16 +603,16 @@ rw_lock_x_unlock_direct(
 
 	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
-#endif
-
 	if (lock->lock_word == 0) {
 		lock->recursive = FALSE;
 		UNIV_MEM_INVALID(&lock->writer_thread,
 				 sizeof lock->writer_thread);
 	}
 
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+#endif
+
 	lock->lock_word += X_LOCK_DECR;
 
 	ut_ad(!lock->waiters);
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
index 6aaab1cc7d7..90def8efa38 100644
--- a/storage/xtradb/include/sync0sync.h
+++ b/storage/xtradb/include/sync0sync.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -219,8 +219,10 @@ void
 sync_thread_add_level(
 /*==================*/
 	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level);	/*!< in: level in the latching order; if
+	ulint	level,	/*!< in: level in the latching order; if
 			SYNC_LEVEL_VARYING, nothing is done */
+	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
+	__attribute__((nonnull));
 /******************************************************************//**
 Removes a latch from the thread level array if it is found there.
 @return TRUE if found in the array; it is no error if the latch is
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
index 2637189f37e..eafa1ab6409 100644
--- a/storage/xtradb/include/trx0sys.h
+++ b/storage/xtradb/include/trx0sys.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -317,6 +317,17 @@ ibool
 trx_in_trx_list(
 /*============*/
 	trx_t*	in_trx);/*!< in: trx */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/***********************************************************//**
+Assert that a transaction has been recovered.
+@return TRUE */
+UNIV_INLINE
+ibool
+trx_assert_recovered(
+/*=================*/
+	trx_id_t	trx_id)		/*!< in: transaction identifier */
+	__attribute__((warn_unused_result));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 /*****************************************************************//**
 Updates the offset information about the end of the MySQL binlog entry
 which corresponds to the transaction just being committed. In a MySQL
diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic
index 5e0f07c8b9d..234fc0b92e9 100644
--- a/storage/xtradb/include/trx0sys.ic
+++ b/storage/xtradb/include/trx0sys.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -311,6 +311,28 @@ trx_get_on_id(
 	return(NULL);
 }
 
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+/***********************************************************//**
+Assert that a transaction has been recovered.
+@return TRUE */
+UNIV_INLINE
+ibool
+trx_assert_recovered(
+/*=================*/
+	trx_id_t	trx_id)		/*!< in: transaction identifier */
+{
+	trx_t*		trx;
+
+	mutex_enter(&kernel_mutex);
+	trx = trx_get_on_id(trx_id);
+	ut_a(trx);
+	ut_a(trx->is_recovered);
+	mutex_exit(&kernel_mutex);
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 /****************************************************************//**
 Returns the minumum trx id in trx list. This is the smallest id for which
 the trx can possibly be active. (But, you must look at the trx->conc_state to
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 8858fe2fafa..6a9fc324ff9 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -44,6 +44,9 @@ extern sess_t*	trx_dummy_sess;
 /** Number of transactions currently allocated for MySQL: protected by
 the kernel mutex */
 extern ulint	trx_n_mysql_transactions;
+/** Number of transactions currently in the XA PREPARED state: protected by
+the kernel mutex */
+extern ulint	trx_n_prepared;
 
 /********************************************************************//**
 Releases the search latch if trx has reserved it. */
@@ -108,6 +111,14 @@ trx_free(
 /*=====*/
 	trx_t*	trx);	/*!< in, own: trx object */
 /********************************************************************//**
+At shutdown, frees a transaction object that is in the PREPARED state. */
+UNIV_INTERN
+void
+trx_free_prepared(
+/*==============*/
+	trx_t*	trx)	/*!< in, own: trx object */
+	__attribute__((nonnull));
+/********************************************************************//**
 Frees a transaction object for MySQL. */
 UNIV_INTERN
 void
@@ -498,6 +509,7 @@ struct trx_struct{
 					150 bytes in the undo log size as then
 					we skip XA steps */
 	ulint		flush_log_at_trx_commit_session;
+	ulint		fake_changes;
 	ulint		flush_log_later;/* In 2PC, we hold the
 					prepare_commit mutex across
 					both phases. In that case, we
@@ -590,6 +602,8 @@ struct trx_struct{
 	ulint		mysql_process_no;/* since in Linux, 'top' reports
 					process id's and not thread id's, we
 					store the process number too */
+	time_t		idle_start;
+	ib_int64_t	last_stmt_start;
 	/*------------------------------*/
 	ulint		n_mysql_tables_in_use; /* number of Innobase tables
 					used in the processing of the current
diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h
index a084f2394b5..4f15cd85833 100644
--- a/storage/xtradb/include/trx0undo.h
+++ b/storage/xtradb/include/trx0undo.h
@@ -298,6 +298,15 @@ void
 trx_undo_insert_cleanup(
 /*====================*/
 	trx_t*	trx);	/*!< in: transaction handle */
+
+/********************************************************************//**
+At shutdown, frees the undo logs of a PREPARED transaction. */
+UNIV_INTERN
+void
+trx_undo_free_prepared(
+/*===================*/
+	trx_t*	trx)	/*!< in/out: PREPARED transaction */
+	__attribute__((nonnull));
 #endif /* !UNIV_HOTBACKUP */
 /***********************************************************//**
 Parses the redo log entry of an undo log page initialization.
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 7b11a16dae9..902d0c94ddd 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -1,8 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Sun Microsystems, Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -46,8 +45,8 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	1
 #define INNODB_VERSION_MINOR	0
-#define INNODB_VERSION_BUGFIX	15
-#define PERCONA_INNODB_VERSION 12.7
+#define INNODB_VERSION_BUGFIX	17
+#define PERCONA_INNODB_VERSION 13.0
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
@@ -197,6 +196,8 @@ this will break redo log file compatibility, but it may be useful when
 debugging redo log application problems. */
 #define UNIV_MEM_DEBUG				/* detect memory leaks etc */
 #define UNIV_IBUF_DEBUG				/* debug the insert buffer */
+#define UNIV_BLOB_DEBUG				/* track BLOB ownership;
+assumes that no BLOBs survive server restart */
 #define UNIV_IBUF_COUNT_DEBUG			/* debug the insert buffer;
 this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
 and the insert buffer must be empty when the database is started */
diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h
index 245dfc226c3..7b15c052978 100644
--- a/storage/xtradb/include/ut0lst.h
+++ b/storage/xtradb/include/ut0lst.h
@@ -257,48 +257,5 @@ do {									\
 	ut_a(ut_list_node_313 == NULL);					\
 } while (0)
 
-/********************************************************************//**
-Align nodes with moving location.
-@param NAME		the name of the list
-@param TYPE		node type
-@param BASE		base node (not a pointer to it)
-@param OFFSET		offset moved */
-#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET)	\
-do {									\
-	ulint	ut_list_i_313;						\
-	TYPE*	ut_list_node_313;					\
-									\
-	if ((BASE).start)						\
-		(BASE).start = (void*)((byte*)((BASE).start)			\
-			+ (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
-	if ((BASE).end)							\
-		(BASE).end   = (void*)((byte*)((BASE).end)			\
-			+ (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
-									\
-	ut_list_node_313 = (BASE).start;				\
-									\
-	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
-		ut_a(ut_list_node_313);					\
-		if ((ut_list_node_313->NAME).prev)			\
-			(ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
-				+ (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
-		if ((ut_list_node_313->NAME).next)			\
-			(ut_list_node_313->NAME).next =	(void*)((byte*)((ut_list_node_313->NAME).next)\
-				+ (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
-		ut_list_node_313 = (ut_list_node_313->NAME).next;	\
-	}								\
-									\
-	ut_a(ut_list_node_313 == NULL);					\
-									\
-	ut_list_node_313 = (BASE).end;					\
-									\
-	for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) {		\
-		ut_a(ut_list_node_313);					\
-		ut_list_node_313 = (ut_list_node_313->NAME).prev;	\
-	}								\
-									\
-	ut_a(ut_list_node_313 == NULL);					\
-} while (0)
-
 #endif
 
diff --git a/storage/xtradb/include/ut0mem.h b/storage/xtradb/include/ut0mem.h
index f14606be966..9c6ee9049ec 100644
--- a/storage/xtradb/include/ut0mem.h
+++ b/storage/xtradb/include/ut0mem.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -210,43 +210,6 @@ ut_strlcpy_rev(
 	ulint		size);	/*!< in: size of destination buffer */
 
 /**********************************************************************//**
-Compute strlen(ut_strcpyq(str, q)).
-@return	length of the string when quoted */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
-	const char*	str,	/*!< in: null-terminated string */
-	char		q);	/*!< in: the quote character */
-
-/**********************************************************************//**
-Make a quoted copy of a NUL-terminated string.	Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq().
-@return	pointer to end of dest */
-UNIV_INTERN
-char*
-ut_strcpyq(
-/*=======*/
-	char*		dest,	/*!< in: output buffer */
-	char		q,	/*!< in: the quote character */
-	const char*	src);	/*!< in: null-terminated string */
-
-/**********************************************************************//**
-Make a quoted copy of a fixed-length string.  Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq().
-@return	pointer to end of dest */
-UNIV_INTERN
-char*
-ut_memcpyq(
-/*=======*/
-	char*		dest,	/*!< in: output buffer */
-	char		q,	/*!< in: the quote character */
-	const char*	src,	/*!< in: string to be quoted */
-	ulint		len);	/*!< in: length of src */
-
-/**********************************************************************//**
 Return the number of times s2 occurs in s1. Overlapping instances of s2
 are only counted once.
 @return	the number of times s2 occurs in s1 */
diff --git a/storage/xtradb/include/ut0mem.ic b/storage/xtradb/include/ut0mem.ic
index f36c28f1989..c06e2b3ae81 100644
--- a/storage/xtradb/include/ut0mem.ic
+++ b/storage/xtradb/include/ut0mem.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -99,27 +99,6 @@ ut_strcmp(const char* str1, const char* str2)
 }
 
 /**********************************************************************//**
-Compute strlen(ut_strcpyq(str, q)).
-@return	length of the string when quoted */
-UNIV_INLINE
-ulint
-ut_strlenq(
-/*=======*/
-	const char*	str,	/*!< in: null-terminated string */
-	char		q)	/*!< in: the quote character */
-{
-	ulint len;
-
-	for (len = 0; *str; len++, str++) {
-		if (*str == q) {
-			len++;
-		}
-	}
-
-	return(len);
-}
-
-/**********************************************************************//**
 Converts a raw binary data to a NUL-terminated hex string. The output is
 truncated if there is not enough space in "hex", make sure "hex_size" is at
 least (2 * raw_size + 1) if you do not want this to happen. Returns the
diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c
index 4fcb5b2c522..e5da4f46ec9 100644
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@@ -3908,6 +3908,10 @@ lock_table(
 
 	trx = thr_get_trx(thr);
 
+	if (trx->fake_changes && mode == LOCK_IX) {
+		mode = LOCK_IS;
+	}
+
 	lock_mutex_enter_kernel();
 
 	/* Look for stronger locks the same trx already has on the table */
@@ -5109,6 +5113,11 @@ lock_rec_insert_check_and_lock(
 	}
 
 	trx = thr_get_trx(thr);
+
+	if (trx->fake_changes) {
+		return(DB_SUCCESS);
+	}
+
 	next_rec = page_rec_get_next_const(rec);
 	next_rec_heap_no = page_rec_get_heap_no(next_rec);
 
@@ -5277,6 +5286,10 @@ lock_clust_rec_modify_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		return(DB_SUCCESS);
+	}
+
 	heap_no = rec_offs_comp(offsets)
 		? rec_get_heap_no_new(rec)
 		: rec_get_heap_no_old(rec);
@@ -5335,6 +5348,10 @@ lock_sec_rec_modify_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes) {
+		return(DB_SUCCESS);
+	}
+
 	heap_no = page_rec_get_heap_no(rec);
 
 	/* Another transaction cannot have an implicit lock on the record,
@@ -5422,6 +5439,10 @@ lock_sec_rec_read_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) {
+		mode = LOCK_S;
+	}
+
 	heap_no = page_rec_get_heap_no(rec);
 
 	lock_mutex_enter_kernel();
@@ -5499,6 +5520,10 @@ lock_clust_rec_read_check_and_lock(
 		return(DB_SUCCESS);
 	}
 
+	if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) {
+		mode = LOCK_S;
+	}
+
 	heap_no = page_rec_get_heap_no(rec);
 
 	lock_mutex_enter_kernel();
diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c
index 82c6c3da23e..7d394504f46 100644
--- a/storage/xtradb/log/log0log.c
+++ b/storage/xtradb/log/log0log.c
@@ -1116,7 +1116,7 @@ log_io_complete(
 		    && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
 		    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
 
-			fil_flush(group->space_id);
+			fil_flush(group->space_id, FALSE);
 		}
 
 #ifdef UNIV_DEBUG
@@ -1139,7 +1139,7 @@ log_io_complete(
 	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
 	    && srv_flush_log_at_trx_commit != 2) {
 
-		fil_flush(group->space_id);
+		fil_flush(group->space_id, FALSE);
 	}
 
 	mutex_enter(&(log_sys->mutex));
@@ -1530,7 +1530,7 @@ loop:
 
 		group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
-		fil_flush(group->space_id);
+		fil_flush(group->space_id, FALSE);
 		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
 	}
 
@@ -2706,7 +2706,7 @@ log_io_complete_archive(void)
 
 	mutex_exit(&(log_sys->mutex));
 
-	fil_flush(group->archive_space_id);
+	fil_flush(group->archive_space_id, TRUE);
 
 	mutex_enter(&(log_sys->mutex));
 
@@ -3209,12 +3209,13 @@ loop:
 		goto loop;
 	}
 
-	/* Check that there are no longer transactions. We need this wait even
-	for the 'very fast' shutdown, because the InnoDB layer may have
-	committed or prepared transactions and we don't want to lose them. */
+	/* Check that there are no longer transactions, except for
+	PREPARED ones. We need this wait even for the 'very fast'
+	shutdown, because the InnoDB layer may have committed or
+	prepared transactions and we don't want to lose them. */
 
 	if (trx_n_mysql_transactions > 0
-	    || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+	    || UT_LIST_GET_LEN(trx_sys->trx_list) > trx_n_prepared) {
 
 		mutex_exit(&kernel_mutex);
 
diff --git a/storage/xtradb/log/log0recv.c b/storage/xtradb/log/log0recv.c
index 895067c700a..fae7fbd0da0 100644
--- a/storage/xtradb/log/log0recv.c
+++ b/storage/xtradb/log/log0recv.c
@@ -2899,7 +2899,6 @@ recv_init_crash_recovery(void)
 /*==========================*/
 {
 	ut_a(!recv_needed_recovery);
-	ut_a(!srv_buffer_pool_shm_is_reused);
 
 	recv_needed_recovery = TRUE;
 
@@ -3622,7 +3621,7 @@ recv_reset_log_files_for_backup(
 			exit(1);
 		}
 
-		os_file_flush(log_file);
+		os_file_flush(log_file, TRUE);
 		os_file_close(log_file);
 	}
 
@@ -3645,7 +3644,7 @@ recv_reset_log_files_for_backup(
 
 	os_file_write(name, log_file, buf, 0, 0,
 		      LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-	os_file_flush(log_file);
+	os_file_flush(log_file, TRUE);
 	os_file_close(log_file);
 
 	ut_free(buf);
diff --git a/storage/xtradb/mtr/mtr0mtr.c b/storage/xtradb/mtr/mtr0mtr.c
index 34e6d3ffc92..a9f1c35f84c 100644
--- a/storage/xtradb/mtr/mtr0mtr.c
+++ b/storage/xtradb/mtr/mtr0mtr.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -248,40 +248,6 @@ mtr_commit(
 }
 
 #ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the latches stored in an mtr memo down to a savepoint.
-NOTE! The mtr must not have made changes to buffer pages after the
-savepoint, as these can be handled only by mtr_commit. */
-UNIV_INTERN
-void
-mtr_rollback_to_savepoint(
-/*======================*/
-	mtr_t*	mtr,		/*!< in: mtr */
-	ulint	savepoint)	/*!< in: savepoint */
-{
-	mtr_memo_slot_t* slot;
-	dyn_array_t*	memo;
-	ulint		offset;
-
-	ut_ad(mtr);
-	ut_ad(mtr->magic_n == MTR_MAGIC_N);
-	ut_ad(mtr->state == MTR_ACTIVE);
-
-	memo = &(mtr->memo);
-
-	offset = dyn_array_get_data_size(memo);
-	ut_ad(offset >= savepoint);
-
-	while (offset > savepoint) {
-		offset -= sizeof(mtr_memo_slot_t);
-
-		slot = dyn_array_get_element(memo, offset);
-
-		ut_ad(slot->type != MTR_MEMO_MODIFY);
-		mtr_memo_slot_release(mtr, slot);
-	}
-}
-
 /***************************************************//**
 Releases an object in the memo stack. */
 UNIV_INTERN
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index ee164bd6317..ef20869c4c5 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -2017,7 +2017,7 @@ os_file_set_size(
 
 	ut_free(buf2);
 
-	ret = os_file_flush(file);
+	ret = os_file_flush(file, TRUE);
 
 	if (ret) {
 		return(TRUE);
@@ -2055,7 +2055,8 @@ static
 int
 os_file_fsync(
 /*==========*/
-	os_file_t	file)	/*!< in: handle to a file */
+	os_file_t	file,	/*!< in: handle to a file */
+	ibool		metadata)
 {
 	int	ret;
 	int	failures;
@@ -2064,7 +2065,16 @@ os_file_fsync(
 	failures = 0;
 
 	do {
+#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
+		if (metadata) {
+			ret = fsync(file);
+		} else {
+			ret = fdatasync(file);
+		}
+#else
+		(void) metadata;
 		ret = fsync(file);
+#endif
 
 		os_n_fsyncs++;
 
@@ -2083,6 +2093,9 @@ os_file_fsync(
 			failures++;
 
 			retry = TRUE;
+		} else if (ret == -1 && errno == EINTR) {
+			/* Handle signal interruptions correctly */
+			retry = TRUE;
 		} else {
 
 			retry = FALSE;
@@ -2100,7 +2113,8 @@ UNIV_INTERN
 ibool
 os_file_flush(
 /*==========*/
-	os_file_t	file)	/*!< in, own: handle to a file */
+	os_file_t	file,	/*!< in, own: handle to a file */
+	ibool		metadata)
 {
 #ifdef __WIN__
 	BOOL	ret;
@@ -2150,18 +2164,18 @@ os_file_flush(
 		/* If we are not on an operating system that supports this,
 		then fall back to a plain fsync. */
 
-		ret = os_file_fsync(file);
+		ret = os_file_fsync(file, metadata);
 	} else {
 		ret = fcntl(file, F_FULLFSYNC, NULL);
 
 		if (ret) {
 			/* If we are not on a file system that supports this,
 			then fall back to a plain fsync. */
-			ret = os_file_fsync(file);
+			ret = os_file_fsync(file, metadata);
 		}
 	}
 #else
-	ret = os_file_fsync(file);
+	ret = os_file_fsync(file, metadata);
 #endif
 
 	if (ret == 0) {
@@ -2214,6 +2228,7 @@ _os_file_pread(
 	off_t	offs;
 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
 	ssize_t	n_bytes;
+	ssize_t n_read;
 #endif /* HAVE_PREAD && !HAVE_BROKEN_PREAD */
 	ulint		sec;
 	ulint		ms;
@@ -2254,7 +2269,18 @@ _os_file_pread(
 	os_n_pending_reads++;
 	os_mutex_exit(os_file_count_mutex);
 
-	n_bytes = pread(file, buf, (ssize_t)n, offs);
+	/* Handle signal interruptions correctly */
+	for (n_bytes = 0; n_bytes < (ssize_t) n; ) {
+		n_read = pread(file, buf, (ssize_t)n, offs);
+		if (n_read > 0) {
+			n_bytes += n_read;
+			offs += n_read;
+		} else if (n_read == -1 && errno == EINTR) {
+			continue;
+		} else {
+			break;
+		}
+	}
 
 	os_mutex_enter(os_file_count_mutex);
 	os_file_n_pending_preads--;
@@ -2273,6 +2299,7 @@ _os_file_pread(
 	{
 		off_t	ret_offset;
 		ssize_t	ret;
+		ssize_t n_read;
 #ifndef UNIV_HOTBACKUP
 		ulint	i;
 #endif /* !UNIV_HOTBACKUP */
@@ -2293,7 +2320,17 @@ _os_file_pread(
 		if (ret_offset < 0) {
 			ret = -1;
 		} else {
-			ret = read(file, buf, (ssize_t)n);
+			/* Handle signal interruptions correctly */
+			for (ret = 0; ret < (ssize_t) n; ) {
+				n_read = read(file, buf, (ssize_t)n);
+				if (n_read > 0) {
+					ret += n_read;
+				} else if (n_read == -1 && errno == EINTR) {
+					continue;
+				} else {
+					break;
+				}
+			}
 		}
 
 #ifndef UNIV_HOTBACKUP
@@ -2332,6 +2369,7 @@ os_file_pwrite(
 				offset */
 {
 	ssize_t	ret;
+	ssize_t n_written;
 	off_t	offs;
 
 	ut_a((offset & 0xFFFFFFFFUL) == offset);
@@ -2359,7 +2397,18 @@ os_file_pwrite(
 	os_n_pending_writes++;
 	os_mutex_exit(os_file_count_mutex);
 
-	ret = pwrite(file, buf, (ssize_t)n, offs);
+	/* Handle signal interruptions correctly */
+	for (ret = 0; ret < (ssize_t) n; ) {
+		n_written = pwrite(file, buf, (ssize_t)n, offs);
+		if (n_written > 0) {
+			ret += n_written;
+			offs += n_written;
+		} else if (n_written == -1 && errno == EINTR) {
+			continue;
+		} else {
+			break;
+		}
+	}
 
 	os_mutex_enter(os_file_count_mutex);
 	os_file_n_pending_pwrites--;
@@ -2375,7 +2424,7 @@ os_file_pwrite(
 		the OS crashes, a database page is only partially
 		physically written to disk. */
 
-		ut_a(TRUE == os_file_flush(file));
+		ut_a(TRUE == os_file_flush(file, TRUE));
 	}
 # endif /* UNIV_DO_FLUSH */
 
@@ -2406,7 +2455,17 @@ os_file_pwrite(
 			goto func_exit;
 		}
 
-		ret = write(file, buf, (ssize_t)n);
+		/* Handle signal interruptions correctly */
+		for (ret = 0; ret < (ssize_t) n; ) {
+			n_written = write(file, buf, (ssize_t)n);
+			if (n_written > 0) {
+				ret += n_written;
+			} else if (n_written == -1 && errno == EINTR) {
+				continue;
+			} else {
+				break;
+			}
+		}
 
 # ifdef UNIV_DO_FLUSH
 		if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
@@ -2417,7 +2476,7 @@ os_file_pwrite(
 			the OS crashes, a database page is only partially
 			physically written to disk. */
 
-			ut_a(TRUE == os_file_flush(file));
+			ut_a(TRUE == os_file_flush(file, TRUE));
 		}
 # endif /* UNIV_DO_FLUSH */
 
@@ -3866,7 +3925,7 @@ os_aio_windows_handle(
 #ifdef UNIV_DO_FLUSH
 		if (slot->type == OS_FILE_WRITE
 		    && !os_do_not_call_flush_at_each_write) {
-			ut_a(TRUE == os_file_flush(slot->file));
+			ut_a(TRUE == os_file_flush(slot->file, TRUE));
 		}
 #endif /* UNIV_DO_FLUSH */
 	} else if (os_file_handle_error(slot->name, "Windows aio")) {
diff --git a/storage/xtradb/os/os0proc.c b/storage/xtradb/os/os0proc.c
index 4567d96b6f4..48922886f23 100644
--- a/storage/xtradb/os/os0proc.c
+++ b/storage/xtradb/os/os0proc.c
@@ -229,173 +229,3 @@ os_mem_free_large(
 	}
 #endif
 }
-
-/****************************************************************//**
-Allocates or attaches and reuses shared memory segment.
-The content is not cleared automatically.
-@return	allocated memory */
-UNIV_INTERN
-void*
-os_shm_alloc(
-/*=========*/
-	ulint*	n,			/*!< in/out: number of bytes */
-	uint	key,
-	ibool*	is_new)
-{
-	void*	ptr;
-#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
-	ulint	size;
-	int	shmid;
-
-	*is_new = FALSE;
-	fprintf(stderr,
-		"InnoDB: The shared memory segment containing the buffer pool is: key  %#x (%d).\n",
-		key, key);
-# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
-	if (!os_use_large_pages || !os_large_page_size) {
-		goto skip;
-	}
-
-	/* Align block size to os_large_page_size */
-	ut_ad(ut_is_2pow(os_large_page_size));
-	size = ut_2pow_round(*n + (os_large_page_size - 1),
-			     os_large_page_size);
-
-	shmid = shmget((key_t)key, (size_t)size,
-			IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
-	if (shmid < 0) {
-		if (errno == EEXIST) {
-			fprintf(stderr,
-				"InnoDB: HugeTLB: The shared memory segment exists.\n");
-			shmid = shmget((key_t)key, (size_t)size,
-					SHM_HUGETLB | SHM_R | SHM_W);
-			if (shmid < 0) {
-				fprintf(stderr,
-					"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
-					size, errno);
-				goto skip;
-			} else {
-				fprintf(stderr,
-					"InnoDB: HugeTLB: The existent shared memory segment is used.\n");
-			}
-		} else {
-			fprintf(stderr,
-				"InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
-				size, errno);
-			goto skip;
-		}
-	} else {
-		*is_new = TRUE;
-		fprintf(stderr,
-			"InnoDB: HugeTLB: A new shared memory segment has been created .\n");
-	}
-
-	ptr = shmat(shmid, NULL, 0);
-	if (ptr == (void *)-1) {
-		fprintf(stderr,
-			"InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
-			errno);
-		ptr = NULL;
-	}
-
-	if (ptr) {
-		*n = size;
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_total_allocated_memory += size;
-		os_fast_mutex_unlock(&ut_list_mutex);
-		UNIV_MEM_ALLOC(ptr, size);
-		return(ptr);
-	}
-skip:
-	*is_new = FALSE;
-# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
-# ifdef HAVE_GETPAGESIZE
-	size = getpagesize();
-# else
-	size = UNIV_PAGE_SIZE;
-# endif
-	/* Align block size to system page size */
-	ut_ad(ut_is_2pow(size));
-	size = *n = ut_2pow_round(*n + (size - 1), size);
-
-	shmid = shmget((key_t)key, (size_t)size,
-			IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
-	if (shmid < 0) {
-		if (errno == EEXIST) {
-			fprintf(stderr,
-				"InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
-			shmid = shmget((key_t)key, (size_t)size,
-					SHM_R | SHM_W);
-			if (shmid < 0) {
-				fprintf(stderr,
-					"InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
-					size, errno);
-				ptr = NULL;
-				goto end;
-			} else {
-				fprintf(stderr,
-					"InnoDB: The existent shared memory segment is used.\n");
-			}
-		} else {
-			fprintf(stderr,
-				"InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
-				size, errno);
-			ptr = NULL;
-			goto end;
-		}
-	} else {
-		*is_new = TRUE;
-		fprintf(stderr,
-			"InnoDB: A new shared memory segment has been created.\n");
-	}
-
-	ptr = shmat(shmid, NULL, 0);
-	if (ptr == (void *)-1) {
-		fprintf(stderr,
-			"InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
-			errno);
-		ptr = NULL;
-	}
-
-	if (ptr) {
-		*n = size;
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_total_allocated_memory += size;
-		os_fast_mutex_unlock(&ut_list_mutex);
-		UNIV_MEM_ALLOC(ptr, size);
-	}
-end:
-#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
-	fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
-	ptr = NULL;
-#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
-	return(ptr);
-}
-
-/****************************************************************//**
-Detach shared memory segment. */
-UNIV_INTERN
-void
-os_shm_free(
-/*========*/
-	void	*ptr,			/*!< in: pointer returned by
-					os_shm_alloc() */
-	ulint	size)			/*!< in: size returned by
-					os_shm_alloc() */
-{
-	os_fast_mutex_lock(&ut_list_mutex);
-	ut_a(ut_total_allocated_memory >= size);
-	os_fast_mutex_unlock(&ut_list_mutex);
-
-#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
-	if (!shmdt(ptr)) {
-		os_fast_mutex_lock(&ut_list_mutex);
-		ut_a(ut_total_allocated_memory >= size);
-		ut_total_allocated_memory -= size;
-		os_fast_mutex_unlock(&ut_list_mutex);
-		UNIV_MEM_FREE(ptr, size);
-	}
-#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
-	fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
-#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
-}
diff --git a/storage/xtradb/page/page0cur.c b/storage/xtradb/page/page0cur.c
index f10f16a7dd9..b8c492328e8 100644
--- a/storage/xtradb/page/page0cur.c
+++ b/storage/xtradb/page/page0cur.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1149,6 +1149,8 @@ use_heap:
 					      current_rec, index, mtr);
 	}
 
+	btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert");
+
 	return(insert_rec);
 }
 
@@ -1178,14 +1180,15 @@ page_cur_insert_rec_zip_reorg(
 	/* Before trying to reorganize the page,
 	store the number of preceding records on the page. */
 	pos = page_rec_get_n_recs_before(rec);
+	ut_ad(pos > 0);
 
 	if (page_zip_reorganize(block, index, mtr)) {
 		/* The page was reorganized: Find rec by seeking to pos,
 		and update *current_rec. */
-		rec = page + PAGE_NEW_INFIMUM;
-
-		while (--pos) {
-			rec = page + rec_get_next_offs(rec, TRUE);
+		if (pos > 1) {
+			rec = page_rec_get_nth(page, pos - 1);
+		} else {
+			rec = page + PAGE_NEW_INFIMUM;
 		}
 
 		*current_rec = rec;
@@ -1195,10 +1198,12 @@ page_cur_insert_rec_zip_reorg(
 	}
 
 	/* Out of space: restore the page */
+	btr_blob_dbg_remove(page, index, "insert_zip_fail");
 	if (!page_zip_decompress(page_zip, page, FALSE)) {
 		ut_error; /* Memory corrupted? */
 	}
 	ut_ad(page_validate(page, index));
+	btr_blob_dbg_add(page, index, "insert_zip_fail");
 	return(NULL);
 }
 
@@ -1279,6 +1284,12 @@ page_cur_insert_rec_zip(
 			insert_rec = page_cur_insert_rec_zip_reorg(
 				current_rec, block, index, insert_rec,
 				page, page_zip, mtr);
+#ifdef UNIV_DEBUG
+			if (insert_rec) {
+				rec_offs_make_valid(
+					insert_rec, index, offsets);
+			}
+#endif /* UNIV_DEBUG */
 		}
 
 		return(insert_rec);
@@ -1490,6 +1501,8 @@ use_heap:
 
 	page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
 
+	btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok");
+
 	/* 9. Write log record of the insert */
 	if (UNIV_LIKELY(mtr != NULL)) {
 		page_cur_insert_rec_write_log(insert_rec, rec_size,
@@ -1697,6 +1710,9 @@ page_copy_rec_list_end_to_created_page(
 
 		heap_top += rec_size;
 
+		rec_offs_make_valid(insert_rec, index, offsets);
+		btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end");
+
 		page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
 					      index, mtr);
 		prev_rec = insert_rec;
@@ -1944,6 +1960,7 @@ page_cur_delete_rec(
 	page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
 
 	/* 6. Free the memory occupied by the record */
+	btr_blob_dbg_remove_rec(current_rec, index, offsets, "delete");
 	page_mem_free(page, page_zip, current_rec, index, offsets);
 
 	/* 7. Now we have decremented the number of owned records of the slot.
diff --git a/storage/xtradb/page/page0page.c b/storage/xtradb/page/page0page.c
index 10008f9ac25..a284b1480a3 100644
--- a/storage/xtradb/page/page0page.c
+++ b/storage/xtradb/page/page0page.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -685,12 +685,16 @@ page_copy_rec_list_end(
 			if (UNIV_UNLIKELY
 			    (!page_zip_reorganize(new_block, index, mtr))) {
 
+				btr_blob_dbg_remove(new_page, index,
+						    "copy_end_reorg_fail");
 				if (UNIV_UNLIKELY
 				    (!page_zip_decompress(new_page_zip,
 							  new_page, FALSE))) {
 					ut_error;
 				}
 				ut_ad(page_validate(new_page, index));
+				btr_blob_dbg_add(new_page, index,
+						 "copy_end_reorg_fail");
 				return(NULL);
 			} else {
 				/* The page was reorganized:
@@ -803,12 +807,16 @@ page_copy_rec_list_start(
 			if (UNIV_UNLIKELY
 			    (!page_zip_reorganize(new_block, index, mtr))) {
 
+				btr_blob_dbg_remove(new_page, index,
+						    "copy_start_reorg_fail");
 				if (UNIV_UNLIKELY
 				    (!page_zip_decompress(new_page_zip,
 							  new_page, FALSE))) {
 					ut_error;
 				}
 				ut_ad(page_validate(new_page, index));
+				btr_blob_dbg_add(new_page, index,
+						 "copy_start_reorg_fail");
 				return(NULL);
 			} else {
 				/* The page was reorganized:
@@ -1080,6 +1088,9 @@ page_delete_rec_list_end(
 	/* Remove the record chain segment from the record chain */
 	page_rec_set_next(prev_rec, page_get_supremum_rec(page));
 
+	btr_blob_dbg_op(page, rec, index, "delete_end",
+			btr_blob_dbg_remove_rec);
+
 	/* Catenate the deleted chain segment to the page free list */
 
 	page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
@@ -1476,55 +1487,54 @@ page_dir_balance_slot(
 	}
 }
 
-#ifndef UNIV_HOTBACKUP
 /************************************************************//**
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list.
-@return	middle record */
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
 UNIV_INTERN
-rec_t*
-page_get_middle_rec(
-/*================*/
-	page_t*	page)	/*!< in: page */
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		nth)	/*!< in: nth record */
 {
-	page_dir_slot_t*	slot;
-	ulint			middle;
+	const page_dir_slot_t*	slot;
 	ulint			i;
 	ulint			n_owned;
-	ulint			count;
-	rec_t*			rec;
+	const rec_t*		rec;
 
-	/* This many records we must leave behind */
-	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
-
-	count = 0;
+	ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	for (i = 0;; i++) {
 
 		slot = page_dir_get_nth_slot(page, i);
 		n_owned = page_dir_slot_get_n_owned(slot);
 
-		if (count + n_owned > middle) {
+		if (n_owned > nth) {
 			break;
 		} else {
-			count += n_owned;
+			nth -= n_owned;
 		}
 	}
 
 	ut_ad(i > 0);
 	slot = page_dir_get_nth_slot(page, i - 1);
-	rec = (rec_t*) page_dir_slot_get_rec(slot);
-	rec = page_rec_get_next(rec);
-
-	/* There are now count records behind rec */
+	rec = page_dir_slot_get_rec(slot);
 
-	for (i = 0; i < middle - count; i++) {
-		rec = page_rec_get_next(rec);
+	if (page_is_comp(page)) {
+		do {
+			rec = page_rec_get_next_low(rec, TRUE);
+			ut_ad(rec);
+		} while (nth--);
+	} else {
+		do {
+			rec = page_rec_get_next_low(rec, FALSE);
+			ut_ad(rec);
+		} while (nth--);
 	}
 
 	return(rec);
 }
-#endif /* !UNIV_HOTBACKUP */
 
 /***************************************************************//**
 Returns the number of records before the given record in chain.
@@ -1586,6 +1596,7 @@ page_rec_get_n_recs_before(
 	n--;
 
 	ut_ad(n >= 0);
+	ut_ad(n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	return((ulint) n);
 }
diff --git a/storage/xtradb/page/page0zip.c b/storage/xtradb/page/page0zip.c
index 5b4f5d3b76a..3d5c5a226c7 100644
--- a/storage/xtradb/page/page0zip.c
+++ b/storage/xtradb/page/page0zip.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -151,6 +151,20 @@ page_zip_empty_size(
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
+Gets the number of elements in the dense page directory,
+including deleted records (the free list).
+@return	number of elements in the dense page directory */
+UNIV_INLINE
+ulint
+page_zip_dir_elems(
+/*===============*/
+	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
+{
+	/* Exclude the page infimum and supremum from the record count. */
+	return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW);
+}
+
+/*************************************************************//**
 Gets the size of the compressed page trailer (the dense page directory),
 including deleted records (the free list).
 @return	length of dense page directory, in bytes */
@@ -160,14 +174,42 @@ page_zip_dir_size(
 /*==============*/
 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
 {
-	/* Exclude the page infimum and supremum from the record count. */
-	ulint	size = PAGE_ZIP_DIR_SLOT_SIZE
-		* (page_dir_get_n_heap(page_zip->data)
-		   - PAGE_HEAP_NO_USER_LOW);
-	return(size);
+	return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
+}
+
+/*************************************************************//**
+Gets an offset to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@return	offset of the dense page directory */
+UNIV_INLINE
+ulint
+page_zip_dir_start_offs(
+/*====================*/
+	const page_zip_des_t*	page_zip,	/*!< in: compressed page */
+	ulint			n_dense)	/*!< in: directory size */
+{
+	ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
+
+	return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
 }
 
 /*************************************************************//**
+Gets a pointer to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@param[in] page_zip	compressed page
+@param[in] n_dense	number of entries in the directory
+@return	pointer to the dense page directory */
+#define page_zip_dir_start_low(page_zip, n_dense)			\
+	((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
+/*************************************************************//**
+Gets a pointer to the compressed page trailer (the dense page directory),
+including deleted records (the free list).
+@param[in] page_zip	compressed page
+@return	pointer to the dense page directory */
+#define page_zip_dir_start(page_zip)					\
+	page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
+
+/*************************************************************//**
 Gets the size of the compressed page trailer (the dense page directory),
 only including user records (excluding the free list).
 @return	length of dense page directory comprising existing records, in bytes */
@@ -653,13 +695,13 @@ page_zip_dir_encode(
 Allocate memory for zlib. */
 static
 void*
-page_zip_malloc(
+page_zip_zalloc(
 /*============*/
 	void*	opaque,	/*!< in/out: memory heap */
 	uInt	items,	/*!< in: number of items to allocate */
 	uInt	size)	/*!< in: size of an item in bytes */
 {
-	return(mem_heap_alloc(opaque, items * size));
+	return(mem_heap_zalloc(opaque, items * size));
 }
 
 /**********************************************************************//**
@@ -684,7 +726,7 @@ page_zip_set_alloc(
 {
 	z_stream*	strm = stream;
 
-	strm->zalloc = page_zip_malloc;
+	strm->zalloc = page_zip_zalloc;
 	strm->zfree = page_zip_free;
 	strm->opaque = heap;
 }
@@ -2246,8 +2288,7 @@ zlib_done:
 	}
 
 	/* Restore the uncompressed columns in heap_no order. */
-	storage	= page_zip->data + page_zip_get_size(page_zip)
-		- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+	storage = page_zip_dir_start_low(page_zip, n_dense);
 
 	for (slot = 0; slot < n_dense; slot++) {
 		rec_t*		rec	= recs[slot];
@@ -2732,8 +2773,7 @@ zlib_done:
 		return(FALSE);
 	}
 
-	storage = page_zip->data + page_zip_get_size(page_zip)
-		- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
+	storage = page_zip_dir_start_low(page_zip, n_dense);
 
 	externs = storage - n_dense
 		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
@@ -2916,19 +2956,18 @@ zlib_error:
 
 	page_zip_set_alloc(&d_stream, heap);
 
-	if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
-			  != Z_OK)) {
-		ut_error;
-	}
-
 	d_stream.next_in = page_zip->data + PAGE_DATA;
 	/* Subtract the space reserved for
 	the page header and the end marker of the modification log. */
 	d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
-
 	d_stream.next_out = page + PAGE_ZIP_START;
 	d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
 
+	if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
+			  != Z_OK)) {
+		ut_error;
+	}
+
 	/* Decode the zlib header and the index information. */
 	if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
 
@@ -3462,9 +3501,7 @@ page_zip_write_rec(
 	}
 
 	/* Write the data bytes.  Store the uncompressed bytes separately. */
-	storage = page_zip->data + page_zip_get_size(page_zip)
-		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
-		* PAGE_ZIP_DIR_SLOT_SIZE;
+	storage = page_zip_dir_start(page_zip);
 
 	if (page_is_leaf(page)) {
 		ulint		len;
@@ -3760,9 +3797,7 @@ corrupt:
 		field = page + offset;
 		storage = page_zip->data + z_offset;
 
-		storage_end = page_zip->data + page_zip_get_size(page_zip)
-			- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
-			* PAGE_ZIP_DIR_SLOT_SIZE;
+		storage_end = page_zip_dir_start(page_zip);
 
 		heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
 
@@ -3798,7 +3833,9 @@ page_zip_write_node_ptr(
 {
 	byte*	field;
 	byte*	storage;
+#ifdef UNIV_DEBUG
 	page_t*	page	= page_align(rec);
+#endif /* UNIV_DEBUG */
 
 	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
 	ut_ad(page_simple_validate_new(page));
@@ -3815,9 +3852,7 @@ page_zip_write_node_ptr(
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 	UNIV_MEM_ASSERT_RW(rec, size);
 
-	storage = page_zip->data + page_zip_get_size(page_zip)
-		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
-		* PAGE_ZIP_DIR_SLOT_SIZE
+	storage = page_zip_dir_start(page_zip)
 		- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
 	field = rec + size - REC_NODE_PTR_SIZE;
 
@@ -3866,7 +3901,9 @@ page_zip_write_trx_id_and_roll_ptr(
 {
 	byte*	field;
 	byte*	storage;
+#ifdef UNIV_DEBUG
 	page_t*	page	= page_align(rec);
+#endif /* UNIV_DEBUG */
 	ulint	len;
 
 	ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
@@ -3884,9 +3921,7 @@ page_zip_write_trx_id_and_roll_ptr(
 
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
-	storage = page_zip->data + page_zip_get_size(page_zip)
-		- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
-		* PAGE_ZIP_DIR_SLOT_SIZE
+	storage = page_zip_dir_start(page_zip)
 		- (rec_get_heap_no_new(rec) - 1)
 		* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
@@ -3917,17 +3952,9 @@ page_zip_write_trx_id_and_roll_ptr(
 	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 }
 
-#ifdef UNIV_ZIP_DEBUG
-/** Set this variable in a debugger to disable page_zip_clear_rec().
-The only observable effect should be the compression ratio due to
-deleted records not being zeroed out.  In rare cases, there can be
-page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
-columns if the space is reallocated for a smaller record. */
-UNIV_INTERN ibool	page_zip_clear_rec_disable;
-#endif /* UNIV_ZIP_DEBUG */
-
 /**********************************************************************//**
-Clear an area on the uncompressed and compressed page, if possible. */
+Clear an area on the uncompressed and compressed page.
+Do not clear the data payload, as that would grow the modification log. */
 static
 void
 page_zip_clear_rec(
@@ -3939,6 +3966,9 @@ page_zip_clear_rec(
 {
 	ulint	heap_no;
 	page_t*	page	= page_align(rec);
+	byte*	storage;
+	byte*	field;
+	ulint	len;
 	/* page_zip_validate() would fail here if a record
 	containing externally stored columns is being deleted. */
 	ut_ad(rec_offs_validate(rec, index, offsets));
@@ -3954,60 +3984,38 @@ page_zip_clear_rec(
 	UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 			   rec_offs_extra_size(offsets));
 
-	if (
-#ifdef UNIV_ZIP_DEBUG
-	    !page_zip_clear_rec_disable &&
-#endif /* UNIV_ZIP_DEBUG */
-	    page_zip->m_end
-	    + 1 + ((heap_no - 1) >= 64)/* size of the log entry */
-	    + page_zip_get_trailer_len(page_zip,
-				       dict_index_is_clust(index), NULL)
-	    < page_zip_get_size(page_zip)) {
-		byte*	data;
-
-		/* Clear only the data bytes, because the allocator and
-		the decompressor depend on the extra bytes. */
-		memset(rec, 0, rec_offs_data_size(offsets));
-
-		if (!page_is_leaf(page)) {
-			/* Clear node_ptr on the compressed page. */
-			byte*	storage	= page_zip->data
-				+ page_zip_get_size(page_zip)
-				- (page_dir_get_n_heap(page)
-				   - PAGE_HEAP_NO_USER_LOW)
-				* PAGE_ZIP_DIR_SLOT_SIZE;
-
-			memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
-			       0, REC_NODE_PTR_SIZE);
-		} else if (dict_index_is_clust(index)) {
-			/* Clear trx_id and roll_ptr on the compressed page. */
-			byte*	storage	= page_zip->data
-				+ page_zip_get_size(page_zip)
-				- (page_dir_get_n_heap(page)
-				   - PAGE_HEAP_NO_USER_LOW)
-				* PAGE_ZIP_DIR_SLOT_SIZE;
-
-			memset(storage - (heap_no - 1)
-			       * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
-			       0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-		}
+	if (!page_is_leaf(page)) {
+		/* Clear node_ptr. On the compressed page,
+		there is an array of node_ptr immediately before the
+		dense page directory, at the very end of the page. */
+		storage	= page_zip_dir_start(page_zip);
+		ut_ad(dict_index_get_n_unique_in_tree(index) ==
+		      rec_offs_n_fields(offsets) - 1);
+		field	= rec_get_nth_field(rec, offsets,
+					    rec_offs_n_fields(offsets) - 1,
+					    &len);
+		ut_ad(len == REC_NODE_PTR_SIZE);
 
-		/* Log that the data was zeroed out. */
-		data = page_zip->data + page_zip->m_end;
-		ut_ad(!*data);
-		if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
-			*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
-			ut_ad(!*data);
-		}
-		*data++ = (byte) ((heap_no - 1) << 1 | 1);
-		ut_ad(!*data);
-		ut_ad((ulint) (data - page_zip->data)
-		      < page_zip_get_size(page_zip));
-		page_zip->m_end = data - page_zip->data;
-		page_zip->m_nonempty = TRUE;
-	} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
-		/* Do not clear the record, because there is not enough space
-		to log the operation. */
+		ut_ad(!rec_offs_any_extern(offsets));
+		memset(field, 0, REC_NODE_PTR_SIZE);
+		memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
+		       0, REC_NODE_PTR_SIZE);
+	} else if (dict_index_is_clust(index)) {
+		/* Clear trx_id and roll_ptr. On the compressed page,
+		there is an array of these fields immediately before the
+		dense page directory, at the very end of the page. */
+		const ulint	trx_id_pos
+			= dict_col_get_clust_pos(
+			dict_table_get_sys_col(
+				index->table, DATA_TRX_ID), index);
+		storage	= page_zip_dir_start(page_zip);
+		field	= rec_get_nth_field(rec, offsets, trx_id_pos, &len);
+		ut_ad(len == DATA_TRX_ID_LEN);
+
+		memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+		memset(storage - (heap_no - 1)
+		       * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
+		       0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
 		if (rec_offs_any_extern(offsets)) {
 			ulint	i;
@@ -4016,15 +4024,18 @@ page_zip_clear_rec(
 				/* Clear all BLOB pointers in order to make
 				page_zip_validate() pass. */
 				if (rec_offs_nth_extern(offsets, i)) {
-					ulint	len;
-					byte*	field = rec_get_nth_field(
+					field = rec_get_nth_field(
 						rec, offsets, i, &len);
+					ut_ad(len
+					      == BTR_EXTERN_FIELD_REF_SIZE);
 					memset(field + len
 					       - BTR_EXTERN_FIELD_REF_SIZE,
 					       0, BTR_EXTERN_FIELD_REF_SIZE);
 				}
 			}
 		}
+	} else {
+		ut_ad(!rec_offs_any_extern(offsets));
 	}
 
 #ifdef UNIV_ZIP_DEBUG
@@ -4455,6 +4466,8 @@ page_zip_reorganize(
 	/* Copy the old page to temporary space */
 	buf_frame_copy(temp_page, page);
 
+	btr_blob_dbg_remove(page, index, "zip_reorg");
+
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
@@ -4513,7 +4526,7 @@ page_zip_copy_recs(
 	mtr_t*			mtr)		/*!< in: mini-transaction */
 {
 	ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-	ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(!dict_index_is_ibuf(index));
 #ifdef UNIV_ZIP_DEBUG
 	/* The B-tree operations that call this function may set
@@ -4583,6 +4596,7 @@ page_zip_copy_recs(
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(page_zip_validate(page_zip, page));
 #endif /* UNIV_ZIP_DEBUG */
+	btr_blob_dbg_add(page, index, "page_zip_copy_recs");
 
 	page_zip_compress_write_log(page_zip, page, index, mtr);
 }
diff --git a/storage/xtradb/que/que0que.c b/storage/xtradb/que/que0que.c
index 9c1d61c1731..5fccbb180fe 100644
--- a/storage/xtradb/que/que0que.c
+++ b/storage/xtradb/que/que0que.c
@@ -1418,6 +1418,12 @@ que_eval_sql(
 
 	ut_a(trx->error_state == DB_SUCCESS);
 
+	if (trx->fake_changes) {
+		/* fake_changes should not access to system tables */
+		fprintf(stderr, "InnoDB: ERROR: innodb_fake_changes tried to access to system tables.\n");
+		return(DB_ERROR);
+	}
+
 	if (reserve_dict_mutex) {
 		mutex_enter(&dict_sys->mutex);
 	}
diff --git a/storage/xtradb/rem/rem0rec.c b/storage/xtradb/rem/rem0rec.c
index 37ba8ca2ffe..9f90d2940dd 100644
--- a/storage/xtradb/rem/rem0rec.c
+++ b/storage/xtradb/rem/rem0rec.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -408,7 +408,7 @@ rec_init_offsets(
 		do {
 			ulint	len;
 			if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
-				len = offs += 4;
+				len = offs += REC_NODE_PTR_SIZE;
 				goto resolved;
 			}
 
@@ -640,7 +640,7 @@ rec_get_offsets_reverse(
 	do {
 		ulint	len;
 		if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
-			len = offs += 4;
+			len = offs += REC_NODE_PTR_SIZE;
 			goto resolved;
 		}
 
@@ -1131,9 +1131,9 @@ rec_convert_dtuple_to_rec_comp(
 
 		if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
 			ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
-			ut_ad(len == 4);
+			ut_ad(len == REC_NODE_PTR_SIZE);
 			memcpy(end, dfield_get_data(field), len);
-			end += 4;
+			end += REC_NODE_PTR_SIZE;
 			break;
 		}
 
diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c
index efcea62f212..0bcbc14fbf6 100644
--- a/storage/xtradb/row/row0ins.c
+++ b/storage/xtradb/row/row0ins.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -345,9 +345,9 @@ row_ins_clust_index_entry_by_modify(
 			return(DB_LOCK_TABLE_FULL);
 
 		}
-		err = btr_cur_pessimistic_update(0, cursor,
-						 heap, big_rec, update,
-						 0, thr, mtr);
+		err = btr_cur_pessimistic_update(
+			BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update,
+			0, thr, mtr);
 	}
 
 	return(err);
@@ -434,11 +434,9 @@ row_ins_cascade_calc_update_vec(
 	dict_table_t*	table		= foreign->foreign_table;
 	dict_index_t*	index		= foreign->foreign_index;
 	upd_t*		update;
-	upd_field_t*	ufield;
 	dict_table_t*	parent_table;
 	dict_index_t*	parent_index;
 	upd_t*		parent_update;
-	upd_field_t*	parent_ufield;
 	ulint		n_fields_updated;
 	ulint		parent_field_no;
 	ulint		i;
@@ -474,13 +472,15 @@ row_ins_cascade_calc_update_vec(
 			dict_index_get_nth_col_no(parent_index, i));
 
 		for (j = 0; j < parent_update->n_fields; j++) {
-			parent_ufield = parent_update->fields + j;
+			const upd_field_t*	parent_ufield
+				= &parent_update->fields[j];
 
 			if (parent_ufield->field_no == parent_field_no) {
 
 				ulint			min_size;
 				const dict_col_t*	col;
 				ulint			ufield_len;
+				upd_field_t*		ufield;
 
 				col = dict_index_get_nth_col(index, i);
 
@@ -493,6 +493,8 @@ row_ins_cascade_calc_update_vec(
 				ufield->field_no
 					= dict_table_get_nth_col_pos(
 					table, dict_col_get_no(col));
+
+				ufield->orig_len = 0;
 				ufield->exp = NULL;
 
 				ufield->new_val = parent_ufield->new_val;
@@ -993,10 +995,9 @@ row_ins_foreign_check_on_constraint(
 		goto nonstandard_exit_func;
 	}
 
-	if ((node->is_delete
-	     && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL))
-	    || (!node->is_delete
-		&& (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
+	if (node->is_delete
+	    ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
+	    : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) {
 
 		/* Build the appropriate update vector which sets
 		foreign->n_fields first fields in rec to SQL NULL */
@@ -1005,6 +1006,8 @@ row_ins_foreign_check_on_constraint(
 
 		update->info_bits = 0;
 		update->n_fields = foreign->n_fields;
+		UNIV_MEM_INVALID(update->fields,
+				 update->n_fields * sizeof *update->fields);
 
 		for (i = 0; i < foreign->n_fields; i++) {
 			upd_field_t*	ufield = &update->fields[i];
@@ -1512,6 +1515,11 @@ exit_func:
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
+
+	if (trx->fake_changes) {
+		err = DB_SUCCESS;
+	}
+
 	return(err);
 }
 
@@ -1992,6 +2000,7 @@ row_ins_index_entry_low(
 	ulint		modify = 0; /* remove warning */
 	rec_t*		insert_rec;
 	rec_t*		rec;
+	ulint*		offsets;
 	ulint		err;
 	ulint		n_unique;
 	big_rec_t*	big_rec			= NULL;
@@ -2013,7 +2022,7 @@ row_ins_index_entry_low(
 	}
 
 	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
-				    mode | BTR_INSERT | ignore_sec_unique,
+				    thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : (mode | BTR_INSERT | ignore_sec_unique),
 				    &cursor, 0, __FILE__, __LINE__, &mtr);
 
 	if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
@@ -2073,7 +2082,7 @@ row_ins_index_entry_low(
 
 			btr_cur_search_to_nth_level(index, 0, entry,
 						    PAGE_CUR_LE,
-						    mode | BTR_INSERT,
+						    thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : (mode | BTR_INSERT),
 						    &cursor, 0,
 						    __FILE__, __LINE__, &mtr);
 		}
@@ -2097,6 +2106,42 @@ row_ins_index_entry_low(
 			err = row_ins_clust_index_entry_by_modify(
 				mode, &cursor, &heap, &big_rec, entry,
 				thr, &mtr);
+
+			if (big_rec) {
+				ut_a(err == DB_SUCCESS);
+				/* Write out the externally stored
+				columns while still x-latching
+				index->lock and block->lock. We have
+				to mtr_commit(mtr) first, so that the
+				redo log will be written in the
+				correct order. Otherwise, we would run
+				into trouble on crash recovery if mtr
+				freed B-tree pages on which some of
+				the big_rec fields will be written. */
+				btr_cur_mtr_commit_and_start(&cursor, &mtr);
+
+				rec = btr_cur_get_rec(&cursor);
+				offsets = rec_get_offsets(
+					rec, index, NULL,
+					ULINT_UNDEFINED, &heap);
+
+				err = btr_store_big_rec_extern_fields(
+					index, btr_cur_get_block(&cursor),
+					rec, offsets, &mtr, FALSE, big_rec);
+				/* If writing big_rec fails (for
+				example, because of DB_OUT_OF_FILE_SPACE),
+				the record will be corrupted. Even if
+				we did not update any externally
+				stored columns, our update could cause
+				the record to grow so that a
+				non-updated column was selected for
+				external storage. This non-update
+				would not have been written to the
+				undo log, and thus the record cannot
+				be rolled back. */
+				ut_a(err == DB_SUCCESS);
+				goto stored_big_rec;
+			}
 		} else {
 			ut_ad(!n_ext);
 			err = row_ins_sec_index_entry_by_modify(
@@ -2125,8 +2170,22 @@ function_exit:
 	mtr_commit(&mtr);
 
 	if (UNIV_LIKELY_NULL(big_rec)) {
-		rec_t*	rec;
-		ulint*	offsets;
+
+		if (thr_get_trx(thr)->fake_changes) {
+			/* skip store extern */
+			if (modify) {
+				dtuple_big_rec_free(big_rec);
+			} else {
+				dtuple_convert_back_big_rec(index, entry, big_rec);
+			}
+
+			if (UNIV_LIKELY_NULL(heap)) {
+				mem_heap_free(heap);
+			}
+
+			return(err);
+		}
+
 		mtr_start(&mtr);
 
 		btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
@@ -2140,6 +2199,7 @@ function_exit:
 			index, btr_cur_get_block(&cursor),
 			rec, offsets, &mtr, FALSE, big_rec);
 
+stored_big_rec:
 		if (modify) {
 			dtuple_big_rec_free(big_rec);
 		} else {
diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c
index 1b97cbb0009..cb003ad62e0 100644
--- a/storage/xtradb/row/row0mysql.c
+++ b/storage/xtradb/row/row0mysql.c
@@ -1190,6 +1190,7 @@ run_again:
 		prebuilt->table->stat_n_rows--;
 	}
 
+	if (!(trx->fake_changes))
 	row_update_statistics_if_needed(prebuilt->table);
 	trx->op_info = "";
 
@@ -1450,6 +1451,7 @@ run_again:
 	that changes indexed columns, UPDATEs that change only non-indexed
 	columns would not affect statistics. */
 	if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+		if (!(trx->fake_changes))
 		row_update_statistics_if_needed(prebuilt->table);
 	}
 
@@ -1668,6 +1670,7 @@ run_again:
 		srv_n_rows_updated++;
 	}
 
+	if (!(trx->fake_changes))
 	row_update_statistics_if_needed(table);
 
 	return(err);
@@ -2556,10 +2559,29 @@ row_discard_tablespace_for_mysql(
 
 			err = DB_ERROR;
 		} else {
+			dict_index_t*	index;
+
 			/* Set the flag which tells that now it is legal to
 			IMPORT a tablespace for this table */
 			table->tablespace_discarded = TRUE;
 			table->ibd_file_missing = TRUE;
+
+			/* check adaptive hash entries */
+			index = dict_table_get_first_index(table);
+			while (index) {
+				ulint ref_count = btr_search_info_get_ref_count(index->search_info);
+				if (ref_count) {
+					fprintf(stderr, "InnoDB: Warning:"
+						" hash index ref_count (%lu) is not zero"
+						" after fil_discard_tablespace().\n"
+						"index: \"%s\""
+						" table: \"%s\"\n",
+						ref_count,
+						index->name,
+						table->name);
+				}
+				index = dict_table_get_next_index(index);
+			}
 		}
 	}
 
@@ -2597,6 +2619,11 @@ row_import_tablespace_for_mysql(
 
 	current_lsn = log_get_lsn();
 
+	/* Enlarge the fatal lock wait timeout during import. */
+	mutex_enter(&kernel_mutex);
+	srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
+	mutex_exit(&kernel_mutex);
+
 	/* It is possible, though very improbable, that the lsn's in the
 	tablespace to be imported have risen above the current system lsn, if
 	a lengthy purge, ibuf merge, or rollback was performed on a backup
@@ -2708,6 +2735,11 @@ funct_exit:
 
 	trx->op_info = "";
 
+	/* Restore the fatal semaphore wait timeout */
+	mutex_enter(&kernel_mutex);
+	srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
+	mutex_exit(&kernel_mutex);
+
 	return((int) err);
 }
 
@@ -2901,6 +2933,19 @@ row_truncate_table_for_mysql(
 			table->space = space;
 			index = dict_table_get_first_index(table);
 			do {
+				ulint ref_count = btr_search_info_get_ref_count(index->search_info);
+				/* check adaptive hash entries */
+				if (ref_count) {
+					fprintf(stderr, "InnoDB: Warning:"
+						" hash index ref_count (%lu) is not zero"
+						" after fil_discard_tablespace().\n"
+						"index: \"%s\""
+						" table: \"%s\"\n",
+						ref_count,
+						index->name,
+						table->name);
+				}
+
 				index->space = space;
 				index = dict_table_get_next_index(index);
 			} while (index);
diff --git a/storage/xtradb/row/row0row.c b/storage/xtradb/row/row0row.c
index 0783d482f76..cea70e98dee 100644
--- a/storage/xtradb/row/row0row.c
+++ b/storage/xtradb/row/row0row.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -47,35 +47,6 @@ Created 4/20/1996 Heikki Tuuri
 #include "read0read.h"
 #include "ut0mem.h"
 
-/*********************************************************************//**
-Gets the offset of trx id field, in bytes relative to the origin of
-a clustered index record.
-@return	offset of DATA_TRX_ID */
-UNIV_INTERN
-ulint
-row_get_trx_id_offset(
-/*==================*/
-	const rec_t*	rec __attribute__((unused)),
-				/*!< in: record */
-	dict_index_t*	index,	/*!< in: clustered index */
-	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
-{
-	ulint	pos;
-	ulint	offset;
-	ulint	len;
-
-	ut_ad(dict_index_is_clust(index));
-	ut_ad(rec_offs_validate(rec, index, offsets));
-
-	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-
-	offset = rec_get_nth_field_offs(offsets, pos, &len);
-
-	ut_ad(len == DATA_TRX_ID_LEN);
-
-	return(offset);
-}
-
 /*****************************************************************//**
 When an insert or purge to a table is performed, this function builds
 the entry to be inserted into or purged from an index on the table.
@@ -130,12 +101,27 @@ row_build_index_entry(
 
 		dfield_copy(dfield, dfield2);
 
-		if (dfield_is_null(dfield) || ind_field->prefix_len == 0) {
+		if (dfield_is_null(dfield)) {
+			continue;
+		}
+
+		if (ind_field->prefix_len == 0
+		    && (!dfield_is_ext(dfield)
+			|| dict_index_is_clust(index))) {
+			/* The dfield_copy() above suffices for
+			columns that are stored in-page, or for
+			clustered index record columns that are not
+			part of a column prefix in the PRIMARY KEY. */
 			continue;
 		}
 
-		/* If a column prefix index, take only the prefix.
-		Prefix-indexed columns may be externally stored. */
+		/* If the column is stored externally (off-page) in
+		the clustered index, it must be an ordering field in
+		the secondary index.  In the Antelope format, only
+		prefix-indexed columns may be stored off-page in the
+		clustered index record. In the Barracuda format, also
+		fully indexed long CHAR or VARCHAR columns may be
+		stored off-page. */
 		ut_ad(col->ord_part);
 
 		if (UNIV_LIKELY_NULL(ext)) {
@@ -148,17 +134,41 @@ row_build_index_entry(
 				}
 				dfield_set_data(dfield, buf, len);
 			}
+
+			if (ind_field->prefix_len == 0) {
+				/* In the Barracuda format
+				(ROW_FORMAT=DYNAMIC or
+				ROW_FORMAT=COMPRESSED), we can have a
+				secondary index on an entire column
+				that is stored off-page in the
+				clustered index. As this is not a
+				prefix index (prefix_len == 0),
+				include the entire off-page column in
+				the secondary index record. */
+				continue;
+			}
 		} else if (dfield_is_ext(dfield)) {
+			/* This table is either in Antelope format
+			(ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT)
+			or a purge record where the ordered part of
+			the field is not external.
+			In Antelope, the maximum column prefix
+			index length is 767 bytes, and the clustered
+			index record contains a 768-byte prefix of
+			each off-page column. */
 			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
 			len -= BTR_EXTERN_FIELD_REF_SIZE;
-			ut_a(ind_field->prefix_len <= len
-			     || dict_index_is_clust(index));
+			dfield_set_len(dfield, len);
 		}
 
-		len = dtype_get_at_most_n_mbchars(
-			col->prtype, col->mbminlen, col->mbmaxlen,
-			ind_field->prefix_len, len, dfield_get_data(dfield));
-		dfield_set_len(dfield, len);
+		/* If a column prefix index, take only the prefix. */
+		if (ind_field->prefix_len) {
+			len = dtype_get_at_most_n_mbchars(
+				col->prtype, col->mbminlen, col->mbmaxlen,
+				ind_field->prefix_len, len,
+				dfield_get_data(dfield));
+			dfield_set_len(dfield, len);
+		}
 	}
 
 	ut_ad(dtuple_check_typed(entry));
@@ -223,6 +233,7 @@ row_build(
 
 	ut_ad(index && rec && heap);
 	ut_ad(dict_index_is_clust(index));
+	ut_ad(!mutex_own(&kernel_mutex));
 
 	if (!offsets) {
 		offsets = rec_get_offsets(rec, index, offsets_,
@@ -231,6 +242,22 @@ row_build(
 		ut_ad(rec_offs_validate(rec, index, offsets));
 	}
 
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	/* This condition can occur during crash recovery before
+	trx_rollback_active() has completed execution.
+
+	This condition is possible if the server crashed
+	during an insert or update before
+	btr_store_big_rec_extern_fields() did mtr_commit() all
+	BLOB pointers to the clustered index record.
+
+	If the record contains a null BLOB pointer, look up the
+	transaction that holds the implicit lock on this record, and
+	assert that it was recovered (and will soon be rolled back). */
+	ut_a(!rec_offs_any_null_extern(rec, offsets)
+	     || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets)));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 	if (type != ROW_COPY_POINTERS) {
 		/* Take a copy of rec to heap */
 		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
@@ -431,6 +458,10 @@ row_rec_to_index_entry(
 		rec = rec_copy(buf, rec, offsets);
 		/* Avoid a debug assertion in rec_offs_validate(). */
 		rec_offs_make_valid(rec, index, offsets);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	} else {
+		ut_a(!rec_offs_any_null_extern(rec, offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 	}
 
 	entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c
index 1dbd9e3a42d..4ae55b47b5b 100644
--- a/storage/xtradb/row/row0sel.c
+++ b/storage/xtradb/row/row0sel.c
@@ -2546,6 +2546,8 @@ row_sel_field_store_in_mysql_format(
 
 	ut_ad(len != UNIV_SQL_NULL);
 	UNIV_MEM_ASSERT_RW(data, len);
+	UNIV_MEM_ASSERT_W(dest, templ->mysql_col_len);
+	UNIV_MEM_INVALID(dest, templ->mysql_col_len);
 
 	switch (templ->type) {
 	case DATA_INT:
@@ -2582,14 +2584,16 @@ row_sel_field_store_in_mysql_format(
 
 			dest = row_mysql_store_true_var_len(
 				dest, len, templ->mysql_length_bytes);
+			/* Copy the actual data. Leave the rest of the
+			buffer uninitialized. */
+			memcpy(dest, data, len);
+			break;
 		}
 
 		/* Copy the actual data */
 		ut_memcpy(dest, data, len);
 
-		/* Pad with trailing spaces. We pad with spaces also the
-		unused end of a >= 5.0.3 true VARCHAR column, just in case
-		MySQL expects its contents to be deterministic. */
+		/* Pad with trailing spaces. */
 
 		pad_ptr = dest + len;
 
@@ -3155,6 +3159,39 @@ sel_restore_position_for_mysql(
 }
 
 /********************************************************************//**
+Copies a cached field for MySQL from the fetch cache. */
+static
+void
+row_sel_copy_cached_field_for_mysql(
+/*================================*/
+	byte*			buf,	/*!< in/out: row buffer */
+	const byte*		cache,	/*!< in: cached row */
+	const mysql_row_templ_t*templ)	/*!< in: column template */
+{
+	ulint	len;
+
+	buf += templ->mysql_col_offset;
+	cache += templ->mysql_col_offset;
+
+	UNIV_MEM_ASSERT_W(buf, templ->mysql_col_len);
+
+	if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR
+	    && templ->type != DATA_INT) {
+		/* Check for != DATA_INT to make sure we do
+		not treat MySQL ENUM or SET as a true VARCHAR!
+		Find the actual length of the true VARCHAR field. */
+		row_mysql_read_true_varchar(
+			&len, cache, templ->mysql_length_bytes);
+		len += templ->mysql_length_bytes;
+		UNIV_MEM_INVALID(buf, templ->mysql_col_len);
+	} else {
+		len = templ->mysql_col_len;
+	}
+
+	ut_memcpy(buf, cache, len);
+}
+
+/********************************************************************//**
 Pops a cached row for MySQL from the fetch cache. */
 UNIV_INLINE
 void
@@ -3166,26 +3203,22 @@ row_sel_pop_cached_row_for_mysql(
 {
 	ulint			i;
 	const mysql_row_templ_t*templ;
-	byte*			cached_rec;
+	const byte*		cached_rec;
 	ut_ad(prebuilt->n_fetch_cached > 0);
 	ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
 
+	UNIV_MEM_ASSERT_W(buf, prebuilt->mysql_row_len);
+
+	cached_rec = prebuilt->fetch_cache[prebuilt->fetch_cache_first];
+
 	if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
 		/* Copy cache record field by field, don't touch fields that
 		are not covered by current key */
-		cached_rec = prebuilt->fetch_cache[
-			prebuilt->fetch_cache_first];
 
 		for (i = 0; i < prebuilt->n_template; i++) {
 			templ = prebuilt->mysql_template + i;
-#if 0 /* Some of the cached_rec may legitimately be uninitialized. */
-			UNIV_MEM_ASSERT_RW(cached_rec
-					   + templ->mysql_col_offset,
-					   templ->mysql_col_len);
-#endif
-			ut_memcpy(buf + templ->mysql_col_offset,
-				  cached_rec + templ->mysql_col_offset,
-				  templ->mysql_col_len);
+			row_sel_copy_cached_field_for_mysql(
+				buf, cached_rec, templ);
 			/* Copy NULL bit of the current field from cached_rec
 			to buf */
 			if (templ->mysql_null_bit_mask) {
@@ -3199,17 +3232,24 @@ row_sel_pop_cached_row_for_mysql(
                                                templ->mysql_null_bit_mask;
 			}
 		}
+	} else if (prebuilt->mysql_prefix_len > 63) {
+		/* The record is long. Copy it field by field, in case
+		there are some long VARCHAR column of which only a
+		small length is being used. */
+		UNIV_MEM_INVALID(buf, prebuilt->mysql_prefix_len);
+
+		/* First copy the NULL bits. */
+		ut_memcpy(buf, cached_rec, prebuilt->null_bitmap_len);
+		/* Then copy the requested fields. */
+
+		for (i = 0; i < prebuilt->n_template; i++) {
+			row_sel_copy_cached_field_for_mysql(
+				buf, cached_rec, prebuilt->mysql_template + i);
+		}
+	} else {
+		ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len);
 	}
-	else {
-#if 0 /* Some of the cached_rec may legitimately be uninitialized. */
-		UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache
-				   [prebuilt->fetch_cache_first],
-				   prebuilt->mysql_prefix_len);
-#endif
-		ut_memcpy(buf,
-			  prebuilt->fetch_cache[prebuilt->fetch_cache_first],
-			  prebuilt->mysql_prefix_len);
-	}
+
 	prebuilt->n_fetch_cached--;
 	prebuilt->fetch_cache_first++;
 
@@ -4086,7 +4126,13 @@ rec_loop:
 	if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
 
 wrong_offs:
-		if (srv_force_recovery == 0 || moves_up == FALSE) {
+		if (srv_pass_corrupt_table && !trx_sys_sys_space(index->table->space)) {
+			index->table->is_corrupt = TRUE;
+			fil_space_set_corrupt(index->table->space);
+		}
+
+		if ((srv_force_recovery == 0 || moves_up == FALSE)
+		    && srv_pass_corrupt_table <= 1) {
 			ut_print_timestamp(stderr);
 			buf_page_print(page_align(rec), 0);
 			fprintf(stderr,
@@ -4137,7 +4183,8 @@ wrong_offs:
 
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
-	if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
+	if (UNIV_UNLIKELY(srv_force_recovery > 0)
+	    || (srv_pass_corrupt_table == 2 && index->table->is_corrupt)) {
 		if (!rec_validate(rec, offsets)
 		    || !btr_index_rec_validate(rec, index, FALSE)) {
 			fprintf(stderr,
diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c
index a6fb266c4ed..5765d3b76d2 100644
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -504,14 +504,49 @@ row_upd_rec_in_place(
 	n_fields = upd_get_n_fields(update);
 
 	for (i = 0; i < n_fields; i++) {
+#ifdef UNIV_BLOB_DEBUG
+		btr_blob_dbg_t	b;
+		const byte*	field_ref	= NULL;
+#endif /* UNIV_BLOB_DEBUG */
+
 		upd_field = upd_get_nth_field(update, i);
 		new_val = &(upd_field->new_val);
 		ut_ad(!dfield_is_ext(new_val) ==
 		      !rec_offs_nth_extern(offsets, upd_field->field_no));
+#ifdef UNIV_BLOB_DEBUG
+		if (dfield_is_ext(new_val)) {
+			ulint	len;
+			field_ref = rec_get_nth_field(rec, offsets, i, &len);
+			ut_a(len != UNIV_SQL_NULL);
+			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+			field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			b.ref_page_no = page_get_page_no(page_align(rec));
+			b.ref_heap_no = page_rec_get_heap_no(rec);
+			b.ref_field_no = i;
+			b.blob_page_no = mach_read_from_4(
+				field_ref + BTR_EXTERN_PAGE_NO);
+			ut_a(b.ref_field_no >= index->n_uniq);
+			btr_blob_dbg_rbt_delete(index, &b, "upd_in_place");
+		}
+#endif /* UNIV_BLOB_DEBUG */
 
 		rec_set_nth_field(rec, offsets, upd_field->field_no,
 				  dfield_get_data(new_val),
 				  dfield_get_len(new_val));
+
+#ifdef UNIV_BLOB_DEBUG
+		if (dfield_is_ext(new_val)) {
+			b.blob_page_no = mach_read_from_4(
+				field_ref + BTR_EXTERN_PAGE_NO);
+			b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN]
+						     & BTR_EXTERN_OWNER_FLAG);
+			b.del = rec_get_deleted_flag(
+				rec, rec_offs_comp(offsets));
+
+			btr_blob_dbg_rbt_insert(index, &b, "upd_in_place");
+		}
+#endif /* UNIV_BLOB_DEBUG */
 	}
 
 	if (UNIV_LIKELY_NULL(page_zip)) {
@@ -1556,8 +1591,9 @@ row_upd_sec_index_entry(
 
 	mtr_start(&mtr);
 
-	found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur,
-				       &mtr);
+	found = row_search_index_entry(index, entry,
+				       trx->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_LEAF,
+				       &pcur, &mtr);
 	btr_cur = btr_pcur_get_btr_cur(&pcur);
 
 	rec = btr_cur_get_rec(btr_cur);
@@ -1787,9 +1823,11 @@ row_upd_clust_rec_by_insert(
 		the previous invocation of this function. Mark the
 		off-page columns in the entry inherited. */
 
+		if (!(trx->fake_changes)) {
 		change_ownership = row_upd_clust_rec_by_insert_inherit(
 			NULL, NULL, entry, node->update);
 		ut_a(change_ownership);
+		}
 		/* fall through */
 	case UPD_NODE_INSERT_CLUSTERED:
 		/* A lock wait occurred in row_ins_index_entry() in
@@ -1819,7 +1857,7 @@ err_exit:
 		delete-marked old record, mark them disowned by the
 		old record and owned by the new entry. */
 
-		if (rec_offs_any_extern(offsets)) {
+		if (rec_offs_any_extern(offsets) && !(trx->fake_changes)) {
 			change_ownership = row_upd_clust_rec_by_insert_inherit(
 				rec, offsets, entry, node->update);
 
@@ -1947,33 +1985,50 @@ row_upd_clust_rec(
 	the same transaction do not modify the record in the meantime.
 	Therefore we can assert that the restoration of the cursor succeeds. */
 
-	ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
+	ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_TREE,
+				       pcur, mtr));
 
 	ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
 				    dict_table_is_comp(index->table)));
 
-	err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
-					 &heap, &big_rec, node->update,
-					 node->cmpl_info, thr, mtr);
-	mtr_commit(mtr);
-
-	if (err == DB_SUCCESS && big_rec) {
+	err = btr_cur_pessimistic_update(
+		BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
+		&heap, &big_rec, node->update, node->cmpl_info, thr, mtr);
+	/* skip store extern for fake_changes */
+	if (big_rec && !(thr_get_trx(thr)->fake_changes)) {
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 		rec_t*		rec;
 		rec_offs_init(offsets_);
 
-		mtr_start(mtr);
+		ut_a(err == DB_SUCCESS);
+		/* Write out the externally stored columns while still
+		x-latching index->lock and block->lock. We have to
+		mtr_commit(mtr) first, so that the redo log will be
+		written in the correct order. Otherwise, we would run
+		into trouble on crash recovery if mtr freed B-tree
+		pages on which some of the big_rec fields will be
+		written. */
+		btr_cur_mtr_commit_and_start(btr_cur, mtr);
 
-		ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
 		rec = btr_cur_get_rec(btr_cur);
 		err = btr_store_big_rec_extern_fields(
 			index, btr_cur_get_block(btr_cur), rec,
 			rec_get_offsets(rec, index, offsets_,
 					ULINT_UNDEFINED, &heap),
 			mtr, TRUE, big_rec);
-		mtr_commit(mtr);
+		/* If writing big_rec fails (for example, because of
+		DB_OUT_OF_FILE_SPACE), the record will be corrupted.
+		Even if we did not update any externally stored
+		columns, our update could cause the record to grow so
+		that a non-updated column was selected for external
+		storage. This non-update would not have been written
+		to the undo log, and thus the record cannot be rolled
+		back. */
+		ut_a(err == DB_SUCCESS);
 	}
 
+	mtr_commit(mtr);
+
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
@@ -2082,7 +2137,8 @@ row_upd_clust_step(
 
 	ut_a(pcur->rel_pos == BTR_PCUR_ON);
 
-	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+	success = btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_LEAF,
+					    pcur, mtr);
 
 	if (!success) {
 		err = DB_RECORD_NOT_FOUND;
diff --git a/storage/xtradb/row/row0vers.c b/storage/xtradb/row/row0vers.c
index d4fde0b939b..8a7bb842293 100644
--- a/storage/xtradb/row/row0vers.c
+++ b/storage/xtradb/row/row0vers.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -550,6 +550,11 @@ row_vers_build_for_consistent_read(
 				/* The view already sees this version: we can
 				copy it to in_heap and return */
 
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+				ut_a(!rec_offs_any_null_extern(
+					     version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 				buf = mem_heap_alloc(in_heap,
 						     rec_offs_size(*offsets));
 				*old_vers = rec_copy(buf, version, *offsets);
@@ -583,6 +588,10 @@ row_vers_build_for_consistent_read(
 		*offsets = rec_get_offsets(prev_version, index, *offsets,
 					   ULINT_UNDEFINED, offset_heap);
 
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+		ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 		trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
 
 		if (read_view_sees_trx_id(view, trx_id)) {
@@ -682,6 +691,10 @@ row_vers_build_for_semi_consistent_read(
 			/* We found a version that belongs to a
 			committed transaction: return it. */
 
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+			ut_a(!rec_offs_any_null_extern(version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 			if (rec == version) {
 				*old_vers = rec;
 				err = DB_SUCCESS;
@@ -739,6 +752,9 @@ row_vers_build_for_semi_consistent_read(
 		version = prev_version;
 		*offsets = rec_get_offsets(version, index, *offsets,
 					   ULINT_UNDEFINED, offset_heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+		ut_a(!rec_offs_any_null_extern(version, *offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 	}/* for (;;) */
 
 	if (heap) {
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 9a142e1ca86..9d74c293405 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -86,6 +86,11 @@ Created 10/8/1995 Heikki Tuuri
 #include "trx0i_s.h"
 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
 
+/* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */
+ibool		innobase_thd_is_idle(const void* thd);
+ib_int64_t	innobase_thd_get_start_time(const void* thd);
+void		innobase_thd_kill(void* thd);
+
 /* prototypes for new functions added to ha_innodb.cc */
 ibool	innobase_get_slow_log();
 
@@ -100,6 +105,9 @@ UNIV_INTERN ulint	srv_activity_count	= 0;
 /* The following is the maximum allowed duration of a lock wait. */
 UNIV_INTERN ulint	srv_fatal_semaphore_wait_threshold = 600;
 
+/**/
+UNIV_INTERN long long	srv_kill_idle_transaction = 0;
+
 /* How much data manipulation language (DML) statements need to be delayed,
 in microseconds, in order to reduce the lagging of the purge thread. */
 UNIV_INTERN ulint	srv_dml_needed_delay = 0;
@@ -225,17 +233,15 @@ UNIV_INTERN ulint	srv_buf_pool_curr_size	= 0;
 UNIV_INTERN ulint	srv_mem_pool_size	= ULINT_MAX;
 UNIV_INTERN ulint	srv_lock_table_size	= ULINT_MAX;
 
-/* key value for shm */
-UNIV_INTERN uint	srv_buffer_pool_shm_key	= 0;
-UNIV_INTERN ibool	srv_buffer_pool_shm_is_reused = FALSE;
-UNIV_INTERN ibool	srv_buffer_pool_shm_checksum = TRUE;
-
 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
 instead. */
 UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
 UNIV_INTERN ulint	srv_n_read_io_threads	= ULINT_MAX;
 UNIV_INTERN ulint	srv_n_write_io_threads	= ULINT_MAX;
 
+/* Switch to enable random read ahead. */
+UNIV_INTERN my_bool	srv_random_read_ahead	= FALSE;
+
 /* The universal page size of the database */
 UNIV_INTERN ulint	srv_page_size_shift	= 0;
 UNIV_INTERN ulint	srv_page_size		= 0;
@@ -334,6 +340,9 @@ UNIV_INTERN ulint srv_buf_pool_reads = 0;
 /** Time in seconds between automatic buffer pool dumps */
 UNIV_INTERN uint srv_auto_lru_dump = 0;
 
+/** Whether startup should be blocked until buffer pool is fully restored */
+UNIV_INTERN ibool srv_blocking_lru_restore;
+
 /* structure to pass status variables to MySQL */
 UNIV_INTERN export_struc export_vars;
 
@@ -2167,6 +2176,8 @@ srv_export_innodb_status(void)
 	export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
 	export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
 	export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
+	export_vars.innodb_buffer_pool_read_ahead_rnd
+		= buf_pool->stat.n_ra_pages_read_rnd;
 	export_vars.innodb_buffer_pool_read_ahead
 		= buf_pool->stat.n_ra_pages_read;
 	export_vars.innodb_buffer_pool_read_ahead_evicted
@@ -2499,6 +2510,12 @@ srv_error_monitor_thread(
 	ulint		fatal_cnt	= 0;
 	ib_uint64_t	old_lsn;
 	ib_uint64_t	new_lsn;
+	/* longest waiting thread for a semaphore */
+	os_thread_id_t	waiter		= os_thread_get_curr_id();
+	os_thread_id_t	old_waiter	= waiter;
+	/* the semaphore that is being waited for */
+	const void*	sema		= NULL;
+	const void*	old_sema	= NULL;
 
 	old_lsn = srv_start_lsn;
 
@@ -2547,7 +2564,8 @@ loop:
 
 	sync_arr_wake_threads_if_sema_free();
 
-	if (sync_array_print_long_waits()) {
+	if (sync_array_print_long_waits(&waiter, &sema)
+	    && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
 		fatal_cnt++;
 		if (fatal_cnt > 10) {
 
@@ -2562,6 +2580,38 @@ loop:
 		}
 	} else {
 		fatal_cnt = 0;
+		old_waiter = waiter;
+		old_sema = sema;
+	}
+
+	if (srv_kill_idle_transaction && trx_sys) {
+		trx_t*	trx;
+		time_t	now;
+rescan_idle:
+		now = time(NULL);
+		mutex_enter(&kernel_mutex);
+		trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+		while (trx) {
+			if (trx->conc_state == TRX_ACTIVE
+			    && trx->mysql_thd
+			    && innobase_thd_is_idle(trx->mysql_thd)) {
+				ib_int64_t	start_time; /* as stmt ID */
+
+				start_time = innobase_thd_get_start_time(trx->mysql_thd);
+				if (trx->last_stmt_start != start_time) {
+					trx->idle_start = now;
+					trx->last_stmt_start = start_time;
+				} else if (difftime(now, trx->idle_start)
+					   > srv_kill_idle_transaction) {
+					/* kill the session */
+					mutex_exit(&kernel_mutex);
+					innobase_thd_kill(trx->mysql_thd);
+					goto rescan_idle;
+				}
+			}
+			trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
+		}
+		mutex_exit(&kernel_mutex);
 	}
 
 	/* Flush stderr so that a database user gets the output
@@ -2607,7 +2657,9 @@ srv_LRU_dump_restore_thread(
 		os_thread_pf(os_thread_get_curr_id()));
 #endif
 
-	if (srv_auto_lru_dump)
+	/* If srv_blocking_lru_restore is TRUE, restore will be done
+	synchronously on startup. */
+	if (srv_auto_lru_dump && !srv_blocking_lru_restore)
 		buf_LRU_file_restore();
 
 	last_dump_time = time(NULL);
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index d002a1bb682..2bb122ea91c 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -88,6 +88,7 @@ Created 2/16/1996 Heikki Tuuri
 # include "thr0loc.h"
 # include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
 # include "zlib.h" /* for ZLIB_VERSION */
+# include "buf0lru.h" /* for buf_LRU_file_restore() */
 
 /** Log sequence number immediately after startup */
 UNIV_INTERN ib_uint64_t	srv_start_lsn;
@@ -126,9 +127,9 @@ static mutex_t		ios_mutex;
 static ulint		ios;
 
 /** io_handler_thread parameters for thread identification */
-static ulint		n[SRV_MAX_N_IO_THREADS + 7 + 64];
+static ulint		n[SRV_MAX_N_IO_THREADS + 7 + UNIV_MAX_PARALLELISM];
 /** io_handler_thread identifiers */
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 7 + 64];
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 7 + UNIV_MAX_PARALLELISM];
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1200,6 +1201,12 @@ innobase_start_or_create_for_mysql(void)
 		);
 #endif
 
+#ifdef UNIV_BLOB_DEBUG
+	fprintf(stderr,
+		"InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
+		"InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
+#endif /* UNIV_BLOB_DEBUG */
+
 #ifdef UNIV_SYNC_DEBUG
 	fprintf(stderr,
 		"InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
@@ -1741,8 +1748,6 @@ innobase_start_or_create_for_mysql(void)
 		Note that this is not as heavy weight as it seems. At
 		this point there will be only ONE page in the buf_LRU
 		and there must be no page in the buf_flush list. */
-		/* buffer_pool_shm should not be reused when recovery was needed. */
-		if (!srv_buffer_pool_shm_is_reused)
 		buf_pool_invalidate();
 
 		/* We always try to do a recovery, even if the database had
@@ -1872,6 +1877,11 @@ innobase_start_or_create_for_mysql(void)
 	os_thread_create(&srv_LRU_dump_restore_thread, NULL,
 			 thread_ids + 5 + SRV_MAX_N_IO_THREADS);
 
+	/* If srv_blocking_lru_restore is TRUE, load buffer pool contents
+	synchronously */
+	if (srv_auto_lru_dump && srv_blocking_lru_restore)
+		buf_LRU_file_restore();
+
 	srv_is_being_started = FALSE;
 
 	if (trx_doublewrite == NULL) {
diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c
index 57a288089c7..4e788b4a968 100644
--- a/storage/xtradb/sync/sync0arr.c
+++ b/storage/xtradb/sync/sync0arr.c
@@ -913,8 +913,10 @@ Prints warnings of long semaphore waits to stderr.
 @return	TRUE if fatal semaphore wait threshold was exceeded */
 UNIV_INTERN
 ibool
-sync_array_print_long_waits(void)
-/*=============================*/
+sync_array_print_long_waits(
+/*========================*/
+	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
+	const void**	sema)	/*!< out: longest-waited-for semaphore */
 {
 	sync_cell_t*	cell;
 	ibool		old_val;
@@ -922,24 +924,40 @@ sync_array_print_long_waits(void)
 	ulint		i;
 	ulint		fatal_timeout = srv_fatal_semaphore_wait_threshold;
 	ibool		fatal = FALSE;
+	double		longest_diff = 0;
 
 	for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
 
+		double	diff;
+		void*	wait_object;
+
 		cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
 
-		if (cell->wait_object != NULL && cell->waiting
-		    && difftime(time(NULL), cell->reservation_time) > 240) {
+		wait_object = cell->wait_object;
+
+		if (wait_object == NULL || !cell->waiting) {
+
+			continue;
+		}
+
+		diff = difftime(time(NULL), cell->reservation_time);
+
+		if (diff > 240) {
 			fputs("InnoDB: Warning: a long semaphore wait:\n",
 			      stderr);
 			sync_array_cell_print(stderr, cell);
 			noticed = TRUE;
 		}
 
-		if (cell->wait_object != NULL && cell->waiting
-		    && difftime(time(NULL), cell->reservation_time)
-		    > fatal_timeout) {
+		if (diff > fatal_timeout) {
 			fatal = TRUE;
 		}
+
+		if (diff > longest_diff) {
+			longest_diff = diff;
+			*sema = wait_object;
+			*waiter = cell->thread;
+		}
 	}
 
 	if (noticed) {
diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c
index 9431de15fda..fe000e7d008 100644
--- a/storage/xtradb/sync/sync0rw.c
+++ b/storage/xtradb/sync/sync0rw.c
@@ -261,6 +261,9 @@ rw_lock_create_func(
 	contains garbage at initialization and cannot be used for
 	recursive x-locking. */
 	lock->recursive = FALSE;
+	/* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */
+	memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread);
+	UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread);
 
 #ifdef UNIV_SYNC_DEBUG
 	UT_LIST_INIT(lock->debug_list);
@@ -762,7 +765,9 @@ rw_lock_add_debug_info(
 	rw_lock_debug_mutex_exit();
 
 	if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
-		sync_thread_add_level(lock, lock->level);
+		sync_thread_add_level(lock, lock->level,
+				      lock_type == RW_LOCK_EX
+				      && lock->lock_word < 0);
 	}
 }
 
diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c
index 3a80da9318b..277a53e4fb2 100644
--- a/storage/xtradb/sync/sync0sync.c
+++ b/storage/xtradb/sync/sync0sync.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -668,7 +668,7 @@ mutex_set_debug_info(
 	ut_ad(mutex);
 	ut_ad(file_name);
 
-	sync_thread_add_level(mutex, mutex->level);
+	sync_thread_add_level(mutex, mutex->level, FALSE);
 
 	mutex->file_name = file_name;
 	mutex->line	 = line;
@@ -1094,8 +1094,9 @@ void
 sync_thread_add_level(
 /*==================*/
 	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level)	/*!< in: level in the latching order; if
+	ulint	level,	/*!< in: level in the latching order; if
 			SYNC_LEVEL_VARYING, nothing is done */
+	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
 {
 	sync_level_t*	array;
 	sync_level_t*	slot;
@@ -1143,6 +1144,10 @@ sync_thread_add_level(
 
 	array = thread_slot->levels;
 
+	if (relock) {
+		goto levels_ok;
+	}
+
 	/* NOTE that there is a problem with _NODE and _LEAF levels: if the
 	B-tree height changes, then a leaf can change to an internal node
 	or the other way around. We do not know at present if this can cause
@@ -1287,6 +1292,7 @@ sync_thread_add_level(
 		ut_error;
 	}
 
+levels_ok:
 	for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
 
 		slot = sync_thread_levels_get_nth(array, i);
diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c
index e148234888b..5cc9df2d5c4 100644
--- a/storage/xtradb/trx/trx0i_s.c
+++ b/storage/xtradb/trx/trx0i_s.c
@@ -504,7 +504,7 @@ fill_trx_row(
 		query[stmt_len] = '\0';
 
 		row->trx_query = ha_storage_put_memlim(
-			cache->storage, stmt, stmt_len + 1,
+			cache->storage, query, stmt_len + 1,
 			MAX_ALLOWED_FOR_STORAGE(cache));
 
 		row->trx_query_cs = innobase_get_charset(trx->mysql_thd);
diff --git a/storage/xtradb/trx/trx0rec.c b/storage/xtradb/trx/trx0rec.c
index 71629f01d73..a7a393d31c8 100644
--- a/storage/xtradb/trx/trx0rec.c
+++ b/storage/xtradb/trx/trx0rec.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1590,6 +1590,10 @@ trx_undo_prev_version_build(
 		return(DB_ERROR);
 	}
 
+# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	ut_a(!rec_offs_any_null_extern(rec, offsets));
+# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 		ulint	n_ext;
 
diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c
index a9fb5bb38d8..b5bec64ee37 100644
--- a/storage/xtradb/trx/trx0sys.c
+++ b/storage/xtradb/trx/trx0sys.c
@@ -37,6 +37,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0rseg.h"
 #include "trx0undo.h"
 #include "srv0srv.h"
+#include "srv0start.h"
 #include "trx0purge.h"
 #include "log0log.h"
 #include "log0recv.h"
@@ -1872,10 +1873,12 @@ void
 trx_sys_close(void)
 /*===============*/
 {
+	trx_t*		trx;
 	trx_rseg_t*	rseg;
 	read_view_t*	view;
 
 	ut_ad(trx_sys != NULL);
+	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
 
 	/* Check that all read views are closed except read view owned
 	by a purge. */
@@ -1907,6 +1910,13 @@ trx_sys_close(void)
 	mem_free(trx_doublewrite);
 	trx_doublewrite = NULL;
 
+	/* Only prepared transactions may be left in the system. Free them. */
+	ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == trx_n_prepared);
+
+	while ((trx = UT_LIST_GET_FIRST(trx_sys->trx_list)) != NULL) {
+		trx_free_prepared(trx);
+	}
+
 	/* There can't be any active transactions. */
 	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
 
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index 7ea3e09036f..1fca58d278f 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -50,6 +50,9 @@ UNIV_INTERN sess_t*		trx_dummy_sess = NULL;
 /** Number of transactions currently allocated for MySQL: protected by
 the kernel mutex */
 UNIV_INTERN ulint	trx_n_mysql_transactions = 0;
+/* Number of transactions currently in the XA PREPARED state: protected by
+the kernel mutex */
+UNIV_INTERN ulint	trx_n_prepared = 0;
 
 /*************************************************************//**
 Set detailed error message for the transaction. */
@@ -111,6 +114,8 @@ trx_create(
 
 	trx->flush_log_at_trx_commit_session = 3; /* means to use innodb_flush_log_at_trx_commit value */
 
+	trx->fake_changes = FALSE;
+
 	trx->check_foreigns = TRUE;
 	trx->check_unique_secondary = TRUE;
 
@@ -134,6 +139,9 @@ trx_create(
 	trx->mysql_relay_log_file_name = "";
 	trx->mysql_relay_log_pos = 0;
 
+	trx->idle_start = 0;
+	trx->last_stmt_start = 0;
+
 	mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO);
 
 	trx->rseg = NULL;
@@ -354,6 +362,60 @@ trx_free(
 }
 
 /********************************************************************//**
+At shutdown, frees a transaction object that is in the PREPARED state. */
+UNIV_INTERN
+void
+trx_free_prepared(
+/*==============*/
+	trx_t*	trx)	/*!< in, own: trx object */
+{
+	ut_ad(mutex_own(&kernel_mutex));
+	ut_a(trx->conc_state == TRX_PREPARED);
+	ut_a(trx->magic_n == TRX_MAGIC_N);
+
+	/* Prepared transactions are sort of active; they allow
+	ROLLBACK and COMMIT operations. Because the system does not
+	contain any other transactions than prepared transactions at
+	the shutdown stage and because a transaction cannot become
+	PREPARED while holding locks, it is safe to release the locks
+	held by PREPARED transactions here at shutdown.*/
+	lock_release_off_kernel(trx);
+
+	trx_undo_free_prepared(trx);
+
+	mutex_free(&trx->undo_mutex);
+
+	if (trx->undo_no_arr) {
+		trx_undo_arr_free(trx->undo_no_arr);
+	}
+
+	ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
+	ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
+
+	ut_a(trx->wait_lock == NULL);
+	ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+	ut_a(!trx->has_search_latch);
+
+	ut_a(trx->dict_operation_lock_mode == 0);
+
+	if (trx->lock_heap) {
+		mem_heap_free(trx->lock_heap);
+	}
+
+	if (trx->global_read_view_heap) {
+		mem_heap_free(trx->global_read_view_heap);
+	}
+
+	ut_a(ib_vector_is_empty(trx->autoinc_locks));
+	ib_vector_free(trx->autoinc_locks);
+
+	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+
+	mem_free(trx);
+}
+
+/********************************************************************//**
 Frees a transaction object for MySQL. */
 UNIV_INTERN
 void
@@ -495,6 +557,7 @@ trx_lists_init_at_db_start(void)
 					if (srv_force_recovery == 0) {
 
 						trx->conc_state = TRX_PREPARED;
+						trx_n_prepared++;
 					} else {
 						fprintf(stderr,
 							"InnoDB: Since"
@@ -573,6 +636,7 @@ trx_lists_init_at_db_start(void)
 
 							trx->conc_state
 								= TRX_PREPARED;
+							trx_n_prepared++;
 						} else {
 							fprintf(stderr,
 								"InnoDB: Since"
@@ -878,6 +942,11 @@ trx_commit_off_kernel(
 	      || trx->conc_state == TRX_PREPARED);
 	ut_ad(mutex_own(&kernel_mutex));
 
+	if (UNIV_UNLIKELY(trx->conc_state == TRX_PREPARED)) {
+		ut_a(trx_n_prepared > 0);
+		trx_n_prepared--;
+	}
+
 	/* The following assignment makes the transaction committed in memory
 	and makes its changes to data visible to other transactions.
 	NOTE that there is a small discrepancy from the strict formal
@@ -1945,6 +2014,7 @@ trx_prepare_off_kernel(
 
 	/*--------------------------------------*/
 	trx->conc_state = TRX_PREPARED;
+	trx_n_prepared++;
 	/*--------------------------------------*/
 
 	if (lsn) {
@@ -2127,10 +2197,11 @@ trx_get_trx_by_xid(
 	while (trx) {
 		/* Compare two X/Open XA transaction id's: their
 		length should be the same and binary comparison
-		of gtrid_lenght+bqual_length bytes should be
+		of gtrid_length+bqual_length bytes should be
 		the same */
 
-		if (trx->conc_state == TRX_PREPARED
+		if (trx->is_recovered
+		    && trx->conc_state == TRX_PREPARED
 		    && xid->gtrid_length == trx->xid.gtrid_length
 		    && xid->bqual_length == trx->xid.bqual_length
 		    && memcmp(xid->data, trx->xid.data,
diff --git a/storage/xtradb/trx/trx0undo.c b/storage/xtradb/trx/trx0undo.c
index 9ed83b5d5c1..ec1cd2d2c43 100644
--- a/storage/xtradb/trx/trx0undo.c
+++ b/storage/xtradb/trx/trx0undo.c
@@ -36,6 +36,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "trx0rseg.h"
 #include "trx0trx.h"
 #include "srv0srv.h"
+#include "srv0start.h"
 #include "trx0rec.h"
 #include "trx0purge.h"
 
@@ -2014,4 +2015,28 @@ trx_undo_insert_cleanup(
 
 	mutex_exit(&(rseg->mutex));
 }
+
+/********************************************************************//**
+At shutdown, frees the undo logs of a PREPARED transaction. */
+UNIV_INTERN
+void
+trx_undo_free_prepared(
+/*===================*/
+	trx_t*	trx)	/*!< in/out: PREPARED transaction */
+{
+	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
+
+	if (trx->update_undo) {
+		ut_a(trx->update_undo->state == TRX_UNDO_PREPARED);
+		UT_LIST_REMOVE(undo_list, trx->rseg->update_undo_list,
+			       trx->update_undo);
+		trx_undo_mem_free(trx->update_undo);
+	}
+	if (trx->insert_undo) {
+		ut_a(trx->insert_undo->state == TRX_UNDO_PREPARED);
+		UT_LIST_REMOVE(undo_list, trx->rseg->insert_undo_list,
+			       trx->insert_undo);
+		trx_undo_mem_free(trx->insert_undo);
+	}
+}
 #endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/ut/ut0mem.c b/storage/xtradb/ut/ut0mem.c
index bf55e4273b6..95fb2187b79 100644
--- a/storage/xtradb/ut/ut0mem.c
+++ b/storage/xtradb/ut/ut0mem.c
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -489,53 +489,6 @@ ut_strlcpy_rev(
 	return(src_size);
 }
 
-/**********************************************************************//**
-Make a quoted copy of a NUL-terminated string.	Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_memcpyq().
-@return	pointer to end of dest */
-UNIV_INTERN
-char*
-ut_strcpyq(
-/*=======*/
-	char*		dest,	/*!< in: output buffer */
-	char		q,	/*!< in: the quote character */
-	const char*	src)	/*!< in: null-terminated string */
-{
-	while (*src) {
-		if ((*dest++ = *src++) == q) {
-			*dest++ = q;
-		}
-	}
-
-	return(dest);
-}
-
-/**********************************************************************//**
-Make a quoted copy of a fixed-length string.  Leading and trailing
-quotes will not be included; only embedded quotes will be escaped.
-See also ut_strlenq() and ut_strcpyq().
-@return	pointer to end of dest */
-UNIV_INTERN
-char*
-ut_memcpyq(
-/*=======*/
-	char*		dest,	/*!< in: output buffer */
-	char		q,	/*!< in: the quote character */
-	const char*	src,	/*!< in: string to be quoted */
-	ulint		len)	/*!< in: length of src */
-{
-	const char*	srcend = src + len;
-
-	while (src < srcend) {
-		if ((*dest++ = *src++) == q) {
-			*dest++ = q;
-		}
-	}
-
-	return(dest);
-}
-
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Return the number of times s2 occurs in s1. Overlapping instances of s2
author	Michael Widenius <monty@askmonty.org>	2011-12-11 11:34:44 +0200
committer	Michael Widenius <monty@askmonty.org>	2011-12-11 11:34:44 +0200
commit	6d4224a31c9d32c8f8067a4f7d16daa29bcdee6b (patch)
tree	79e3143528495069ad232f673532573b30afe425 /storage/xtradb
parent	3e2cb35e11cb5ee6668d538a62a3b32e017944a5 (diff)
parent	701c0f822abe4ee9eeafd244fa30dc2fcf067b81 (diff)
download	mariadb-git-6d4224a31c9d32c8f8067a4f7d16daa29bcdee6b.tar.gz