1 files changed, 501 insertions, 495 deletions
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 30b991d24cf..1cb46aecf22 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -24,19 +24,16 @@ Created 11/5/1995 Heikki Tuuri
 *******************************************************/
 
 #include "buf0lru.h"
-
-#ifndef UNIV_HOTBACKUP
 #ifdef UNIV_NONINL
 #include "buf0lru.ic"
-#endif
+#endif /* UNIV_NOINL */
 
+#ifndef UNIV_HOTBACKUP
 #include "ut0byte.h"
-#include "ut0lst.h"
 #include "ut0rnd.h"
-#include "sync0sync.h"
 #include "sync0rw.h"
 #include "hash0hash.h"
-#include "os0sync.h"
+#include "os0event.h"
 #include "fil0fil.h"
 #include "btr0btr.h"
 #include "buf0buddy.h"
@@ -53,8 +50,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "srv0mon.h"
 #include "lock0lock.h"
 
-#include "ha_prototypes.h"
-
 /** The number of blocks from the LRU_old pointer onward, including
 the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
 of the whole LRU list length, except that the tolerance defined below
@@ -62,7 +57,7 @@ is allowed. Note that the tolerance must be small enough such that for
 even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
 allowed to point to either end of the LRU list. */
 
-#define BUF_LRU_OLD_TOLERANCE	20
+static const ulint BUF_LRU_OLD_TOLERANCE = 20;
 
 /** The minimum amount of non-old blocks when the LRU_old list exists
 (that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
@@ -79,7 +74,11 @@ We also release buf_pool->mutex after scanning this many pages of the
 flush_list when dropping a table. This is to ensure that other threads
 are not blocked for extended period of time when using very large
 buffer pools. */
-#define BUF_LRU_DROP_SEARCH_SIZE	1024
+static const ulint BUF_LRU_DROP_SEARCH_SIZE = 1024;
+
+/** We scan these many blocks when looking for a clean page to evict
+during LRU eviction. */
+static const ulint BUF_LRU_SEARCH_SCAN_THRESHOLD = 100;
 
 /** We scan these many blocks when looking for a clean page to evict
 during LRU eviction. */
@@ -87,7 +86,7 @@ during LRU eviction. */
 
 /** If we switch on the InnoDB monitor because there are too few available
 frames in the buffer pool, we set this to TRUE */
-static ibool	buf_lru_switched_on_innodb_mon	= FALSE;
+static bool buf_lru_switched_on_innodb_mon = false;
 
 /******************************************************************//**
 These statistics are not 'of' LRU but 'for' LRU.  We keep count of I/O
@@ -103,11 +102,11 @@ uncompressed and compressed data), which must be clean. */
 /** Number of intervals for which we keep the history of these stats.
 Each interval is 1 second, defined by the rate at which
 srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-#define BUF_LRU_STAT_N_INTERVAL 50
+static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
 
 /** Co-efficient with which we multiply I/O operations to equate them
 with page_zip_decompress() operations. */
-#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
+static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
 
 /** Sampled values buf_LRU_stat_cur.
 Not protected by any mutex.  Updated by buf_LRU_stat_update(). */
@@ -118,18 +117,18 @@ static ulint			buf_LRU_stat_arr_ind;
 
 /** Current operation counters.  Not protected by any mutex.  Cleared
 by buf_LRU_stat_update(). */
-UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_cur;
+buf_LRU_stat_t	buf_LRU_stat_cur;
 
 /** Running sum of past values of buf_LRU_stat_cur.
 Updated by buf_LRU_stat_update().  Not Protected by any mutex. */
-UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_sum;
+buf_LRU_stat_t	buf_LRU_stat_sum;
 
 /* @} */
 
 /** @name Heuristics for detecting index scan @{ */
 /** Move blocks to "new" LRU list only if the first access was at
 least this many milliseconds ago.  Not protected by any mutex or latch. */
-UNIV_INTERN uint	buf_LRU_old_threshold_ms;
+uint	buf_LRU_old_threshold_ms;
 /* @} */
 
 /******************************************************************//**
@@ -165,8 +164,7 @@ buf_LRU_block_free_hashed_page(
 				be in a state where it can be freed */
 
 /******************************************************************//**
-Increases LRU size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
+Increases LRU size in bytes with page size inline function */
 static inline
 void
 incr_LRU_size_in_bytes(
@@ -175,24 +173,21 @@ incr_LRU_size_in_bytes(
 	buf_pool_t*	buf_pool)	/*!< in: buffer pool instance */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
-	ulint zip_size = page_zip_get_size(&bpage->zip);
-	buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+	buf_pool->stat.LRU_bytes += bpage->size.physical();
+
 	ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
 }
 
 /******************************************************************//**
 Determines if the unzip_LRU list should be used for evicting a victim
 instead of the general LRU list.
-@return	TRUE if should use unzip_LRU */
-UNIV_INTERN
+@return TRUE if should use unzip_LRU */
 ibool
 buf_LRU_evict_from_unzip_LRU(
 /*=========================*/
 	buf_pool_t*	buf_pool)
 {
-	ulint	io_avg;
-	ulint	unzip_avg;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	/* If the unzip_LRU list is empty, we can only use the LRU. */
@@ -216,9 +211,10 @@ buf_LRU_evict_from_unzip_LRU(
 
 	/* Calculate the average over past intervals, and add the values
 	of the current interval. */
-	io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
+	ulint	io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
 		+ buf_LRU_stat_cur.io;
-	unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
+
+	ulint	unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
 		+ buf_LRU_stat_cur.unzip;
 
 	/* Decide based on our formula.  If the load is I/O bound
@@ -228,27 +224,33 @@ buf_LRU_evict_from_unzip_LRU(
 	return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
 }
 
-/******************************************************************//**
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
+/** Attempts to drop page hash index on a batch of pages belonging to a
+particular space id.
+@param[in]	space_id	space id
+@param[in]	page_size	page size
+@param[in]	arr		array of page_no
+@param[in]	count		number of entries in array */
 static
 void
 buf_LRU_drop_page_hash_batch(
-/*=========================*/
-	ulint		space_id,	/*!< in: space id */
-	ulint		zip_size,	/*!< in: compressed page size in bytes
-					or 0 for uncompressed pages */
-	const ulint*	arr,		/*!< in: array of page_no */
-	ulint		count)		/*!< in: number of entries in array */
+	ulint			space_id,
+	const page_size_t&	page_size,
+	const ulint*		arr,
+	ulint			count)
 {
-	ulint	i;
-
-	ut_ad(arr != NULL);
 	ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE);
 
-	for (i = 0; i < count; ++i) {
-		btr_search_drop_page_hash_when_freed(space_id, zip_size,
-						     arr[i]);
+	for (ulint i = 0; i < count; ++i, ++arr) {
+		/* While our only caller
+		buf_LRU_drop_page_hash_for_tablespace()
+		is being executed for DROP TABLE or similar,
+		the table cannot be evicted from the buffer pool.
+		Note: this should not be executed for DROP TABLESPACE,
+		because DROP TABLESPACE would be refused if tables existed
+		in the tablespace, and a previous DROP TABLE would have
+		already removed the AHI entries. */
+		btr_search_drop_page_hash_when_freed(
+			page_id_t(space_id, *arr), page_size);
 	}
 }
 
@@ -264,38 +266,33 @@ buf_LRU_drop_page_hash_for_tablespace(
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	ulint		id)		/*!< in: space id */
 {
-	buf_page_t*	bpage;
-	ulint*		page_arr;
-	ulint		num_entries;
-	ulint		zip_size;
+	bool			found;
+	const page_size_t	page_size(fil_space_get_page_size(id, &found));
 
-	zip_size = fil_space_get_zip_size(id);
-
-	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
+	if (!found) {
 		/* Somehow, the tablespace does not exist.  Nothing to drop. */
 		ut_ad(0);
 		return;
 	}
 
-	page_arr = static_cast<ulint*>(ut_malloc(
-		sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
+	ulint*	page_arr = static_cast<ulint*>(ut_malloc_nokey(
+			sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
+
+	ulint	num_entries = 0;
 
 	buf_pool_mutex_enter(buf_pool);
-	num_entries = 0;
 
 scan_again:
-	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	while (bpage != NULL) {
-		buf_page_t*	prev_bpage;
-		ibool		is_fixed;
+	for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
+	     /* No op */) {
 
-		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_t*	prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
 		ut_a(buf_page_in_file(bpage));
 
 		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
-		    || bpage->space != id
+		    || bpage->id.space() != id
 		    || bpage->io_fix != BUF_IO_NONE) {
 			/* Compressed pages are never hashed.
 			Skip blocks of other tablespaces.
@@ -306,17 +303,25 @@ next_page:
 		}
 
 		mutex_enter(&((buf_block_t*) bpage)->mutex);
-		is_fixed = bpage->buf_fix_count > 0
-			|| !((buf_block_t*) bpage)->index;
-		mutex_exit(&((buf_block_t*) bpage)->mutex);
 
-		if (is_fixed) {
-			goto next_page;
+		{
+			bool	skip = bpage->buf_fix_count > 0
+				|| !((buf_block_t*) bpage)->index;
+
+			mutex_exit(&((buf_block_t*) bpage)->mutex);
+
+			if (skip) {
+				/* Skip this block, because there are
+				no adaptive hash index entries
+				pointing to it, or because we cannot
+				drop them due to the buffer-fix. */
+				goto next_page;
+			}
 		}
 
 		/* Store the page number so that we can drop the hash
 		index in a batch later. */
-		page_arr[num_entries] = bpage->offset;
+		page_arr[num_entries] = bpage->id.page_no();
 		ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
 		++num_entries;
 
@@ -329,7 +334,7 @@ next_page:
 		buf_pool_mutex_exit(buf_pool);
 
 		buf_LRU_drop_page_hash_batch(
-			id, zip_size, page_arr, num_entries);
+			id, page_size, page_arr, num_entries);
 
 		num_entries = 0;
 
@@ -351,8 +356,9 @@ next_page:
 		/* If, however, bpage has been removed from LRU list
 		to the free list then we should restart the scan.
 		bpage->state is protected by buf_pool mutex. */
-		if (bpage
+		if (bpage != NULL
 		    && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+
 			goto scan_again;
 		}
 	}
@@ -360,7 +366,7 @@ next_page:
 	buf_pool_mutex_exit(buf_pool);
 
 	/* Drop any remaining batch of search hashed pages. */
-	buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+	buf_LRU_drop_page_hash_batch(id, page_size, page_arr, num_entries);
 	ut_free(page_arr);
 }
 
@@ -377,7 +383,7 @@ buf_flush_yield(
 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
 	buf_page_t*	bpage)		/*!< in/out: current page */
 {
-	ib_mutex_t*	block_mutex;
+	BPageMutex*	block_mutex;
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
@@ -385,6 +391,7 @@ buf_flush_yield(
 	block_mutex = buf_page_get_mutex(bpage);
 
 	mutex_enter(block_mutex);
+
 	/* "Fix" the block so that the position cannot be
 	changed after we release the buffer pool and
 	block mutexes. */
@@ -400,6 +407,7 @@ buf_flush_yield(
 	buf_pool_mutex_enter(buf_pool);
 
 	mutex_enter(block_mutex);
+
 	/* "Unfix" the block now that we have both the
 	buffer pool and block mutex again. */
 	buf_page_unset_sticky(bpage);
@@ -477,8 +485,14 @@ buf_flush_or_remove_page(
 		yet; maybe the system is currently reading it
 		in, or flushing the modifications to the file */
 		return(false);
+
 	}
 
+	BPageMutex*	block_mutex;
+	bool		processed = false;
+
+	block_mutex = buf_page_get_mutex(bpage);
+
 	/* We have to release the flush_list_mutex to obey the
 	latching order. We are however guaranteed that the page
 	will stay in the flush_list and won't be relocated because
@@ -487,9 +501,6 @@ buf_flush_or_remove_page(
 
 	buf_flush_list_mutex_exit(buf_pool);
 
-	bool		processed;
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
-
 	mutex_enter(block_mutex);
 
 	ut_ad(bpage->oldest_modification != 0);
@@ -497,28 +508,30 @@ buf_flush_or_remove_page(
 	if (!flush) {
 
 		buf_flush_remove(bpage);
-		processed = true;
-
-	} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)
-		   && buf_flush_page(
-			   buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) {
 
-		/* Wake possible simulated aio thread to actually
-		post the writes to the operating system */
-		os_aio_simulated_wake_handler_threads();
+		mutex_exit(block_mutex);
 
-		buf_pool_mutex_enter(buf_pool);
+		processed = true;
 
-		buf_flush_list_mutex_enter(buf_pool);
+	} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
 
-		return(true);
+		/* The following call will release the buffer pool
+		and block mutex. */
+		processed = buf_flush_page(
+			buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
 
+		if (processed) {
+			/* Wake possible simulated aio thread to actually
+			post the writes to the operating system */
+			os_aio_simulated_wake_handler_threads();
+			buf_pool_mutex_enter(buf_pool);
+		} else {
+			mutex_exit(block_mutex);
+		}
 	} else {
-		processed = false;
+		mutex_exit(block_mutex);
 	}
 
-	mutex_exit(block_mutex);
-
 	buf_flush_list_mutex_enter(buf_pool);
 
 	ut_ad(!mutex_own(block_mutex));
@@ -542,6 +555,7 @@ buf_flush_or_remove_pages(
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		id,		/*!< in: target space id for which
 					to remove or flush pages */
+	FlushObserver*	observer,	/*!< in: flush observer */
 	bool		flush,		/*!< in: flush to disk if true but
 					don't remove else remove without
 					flushing to disk */
@@ -568,7 +582,10 @@ rescan:
 
 		prev = UT_LIST_GET_PREV(list, bpage);
 
-		if (buf_page_get_space(bpage) != id) {
+		/* If flush observer is NULL, flush page for space id,
+		or flush page for flush observer. */
+		if ((observer != NULL && observer != bpage->flush_observer)
+		    || (observer == NULL && id != bpage->id.space())) {
 
 			/* Skip this block, as it does not belong to
 			the target space. */
@@ -626,6 +643,16 @@ rescan:
 		/* The check for trx is interrupted is expensive, we want
 		to check every N iterations. */
 		if (!processed && trx && trx_is_interrupted(trx)) {
+			if (trx->flush_observer != NULL) {
+				if (flush) {
+					trx->flush_observer->interrupted();
+				} else {
+					/* We should remove all pages with the
+					the flush observer. */
+					continue;
+				}
+			}
+
 			buf_flush_list_mutex_exit(buf_pool);
 			return(DB_INTERRUPTED);
 		}
@@ -647,6 +674,7 @@ buf_flush_dirty_pages(
 /*==================*/
 	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
 	ulint		id,		/*!< in: space id */
+	FlushObserver*	observer,	/*!< in: flush observer */
 	bool		flush,		/*!< in: flush to disk if true otherwise
 					remove the pages without flushing */
 	const trx_t*	trx)		/*!< to check if the operation must
@@ -657,7 +685,8 @@ buf_flush_dirty_pages(
 	do {
 		buf_pool_mutex_enter(buf_pool);
 
-		err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
+		err = buf_flush_or_remove_pages(
+			buf_pool, id, observer, flush, trx);
 
 		buf_pool_mutex_exit(buf_pool);
 
@@ -667,6 +696,13 @@ buf_flush_dirty_pages(
 			os_thread_sleep(2000);
 		}
 
+		if (err == DB_INTERRUPTED && observer != NULL) {
+			ut_a(flush);
+
+			flush = false;
+			err = DB_FAIL;
+		}
+
 		/* DB_FAIL is a soft error, it means that the task wasn't
 		completed, needs to be retried. */
 
@@ -675,7 +711,7 @@ buf_flush_dirty_pages(
 	} while (err == DB_FAIL);
 
 	ut_ad(err == DB_INTERRUPTED
-	      || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+	      || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
 }
 
 /******************************************************************//**
@@ -702,18 +738,18 @@ scan_again:
 
 		rw_lock_t*	hash_lock;
 		buf_page_t*	prev_bpage;
-		ib_mutex_t*	block_mutex = NULL;
+		BPageMutex*	block_mutex;
 
 		ut_a(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		/* bpage->space and bpage->io_fix are protected by
+		/* bpage->id.space() and bpage->io_fix are protected by
 		buf_pool->mutex and the block_mutex. It is safe to check
 		them while holding buf_pool->mutex only. */
 
-		if (buf_page_get_space(bpage) != id) {
+		if (bpage->id.space() != id) {
 			/* Skip this block, as it does not belong to
 			the space that is being invalidated. */
 			goto next_page;
@@ -725,14 +761,12 @@ scan_again:
 			all_freed = FALSE;
 			goto next_page;
 		} else {
-			ulint	fold = buf_page_address_fold(
-				bpage->space, bpage->offset);
-
-			hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+			hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
 
 			rw_lock_x_lock(hash_lock);
 
 			block_mutex = buf_page_get_mutex(bpage);
+
 			mutex_enter(block_mutex);
 
 			if (bpage->buf_fix_count > 0) {
@@ -754,35 +788,30 @@ scan_again:
 
 		ut_ad(mutex_own(block_mutex));
 
-#ifdef UNIV_DEBUG
-		if (buf_debug_prints) {
-			fprintf(stderr,
-				"Dropping space %lu page %lu\n",
-				(ulong) buf_page_get_space(bpage),
-				(ulong) buf_page_get_page_no(bpage));
-		}
-#endif
+		DBUG_PRINT("ib_buf", ("evict page %u:%u"
+				      " state %u",
+				      bpage->id.space(),
+				      bpage->id.page_no(),
+				      bpage->state));
+
 		if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
 			/* Do nothing, because the adaptive hash index
 			covers uncompressed pages only. */
 		} else if (((buf_block_t*) bpage)->index) {
-			ulint	page_no;
-			ulint	zip_size;
-
 			buf_pool_mutex_exit(buf_pool);
 
-			zip_size = buf_page_get_zip_size(bpage);
-			page_no = buf_page_get_page_no(bpage);
-
 			rw_lock_x_unlock(hash_lock);
 
 			mutex_exit(block_mutex);
 
 			/* Note that the following call will acquire
-			and release block->lock X-latch. */
+			and release block->lock X-latch.
+			Note that the table cannot be evicted during
+			the execution of ALTER TABLE...DISCARD TABLESPACE
+			because MySQL is keeping the table handle open. */
 
 			btr_search_drop_page_hash_when_freed(
-				id, zip_size, page_no);
+				bpage->id, bpage->size);
 
 			goto scan_again;
 		}
@@ -804,11 +833,9 @@ scan_again:
 
 		ut_ad(!mutex_own(block_mutex));
 
-#ifdef UNIV_SYNC_DEBUG
 		/* buf_LRU_block_remove_hashed() releases the hash_lock */
-		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
-		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X));
+		ut_ad(!rw_lock_own(hash_lock, RW_LOCK_S));
 
 next_page:
 		bpage = prev_bpage;
@@ -839,22 +866,27 @@ buf_LRU_remove_pages(
 	const trx_t*	trx)		/*!< to check if the operation must
 					be interrupted */
 {
+	FlushObserver*	observer = (trx == NULL) ? NULL : trx->flush_observer;
+
 	switch (buf_remove) {
 	case BUF_REMOVE_ALL_NO_WRITE:
 		buf_LRU_remove_all_pages(buf_pool, id);
 		break;
 
 	case BUF_REMOVE_FLUSH_NO_WRITE:
-		ut_a(trx == 0);
-		buf_flush_dirty_pages(buf_pool, id, false, NULL);
+		/* Pass trx as NULL to avoid interruption check. */
+		buf_flush_dirty_pages(buf_pool, id, observer, false, NULL);
 		break;
 
 	case BUF_REMOVE_FLUSH_WRITE:
-		ut_a(trx != 0);
-		buf_flush_dirty_pages(buf_pool, id, true, trx);
-		/* Ensure that all asynchronous IO is completed. */
-		os_aio_wait_until_no_pending_writes();
-		fil_flush(id);
+		buf_flush_dirty_pages(buf_pool, id, observer, true, trx);
+
+		if (observer == NULL) {
+			/* Ensure that all asynchronous IO is completed. */
+			os_aio_wait_until_no_pending_writes();
+			fil_flush(id);
+		}
+
 		break;
 	}
 }
@@ -864,7 +896,6 @@ Flushes all dirty pages or removes all pages belonging
 to a given tablespace. A PROBLEM: if readahead is being started, what
 guarantees that it will not try to read in pages after this operation
 has completed? */
-UNIV_INTERN
 void
 buf_LRU_flush_or_remove_pages(
 /*==========================*/
@@ -909,13 +940,11 @@ buf_LRU_flush_or_remove_pages(
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
 void
 buf_LRU_insert_zip_clean(
 /*=====================*/
 	buf_page_t*	bpage)	/*!< in: pointer to the block in question */
 {
-	buf_page_t*	b;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
@@ -923,20 +952,21 @@ buf_LRU_insert_zip_clean(
 
 	/* Find the first successor of bpage in the LRU list
 	that is in the zip_clean list. */
-	b = bpage;
+	buf_page_t*	b = bpage;
+
 	do {
 		b = UT_LIST_GET_NEXT(LRU, b);
 	} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
 
 	/* Insert bpage before b, i.e., after the predecessor of b. */
-	if (b) {
+	if (b != NULL) {
 		b = UT_LIST_GET_PREV(list, b);
 	}
 
-	if (b) {
-		UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
+	if (b != NULL) {
+		UT_LIST_INSERT_AFTER(buf_pool->zip_clean, b, bpage);
 	} else {
-		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
+		UT_LIST_ADD_FIRST(buf_pool->zip_clean, bpage);
 	}
 }
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -944,34 +974,34 @@ buf_LRU_insert_zip_clean(
 /******************************************************************//**
 Try to free an uncompressed page of a compressed block from the unzip
 LRU list.  The compressed page is preserved, and it need not be clean.
-@return	TRUE if freed */
-UNIV_INLINE
-ibool
+@return true if freed */
+static
+bool
 buf_LRU_free_from_unzip_LRU_list(
 /*=============================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
 					srv_LRU_scan_depth / 2 blocks. */
 {
-	buf_block_t*	block;
-	ibool 		freed;
-	ulint		scanned;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
-		return(FALSE);
+		return(false);
 	}
 
-	for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU),
-	     scanned = 0, freed = FALSE;
-	     block != NULL && !freed
+	ulint	scanned = 0;
+	bool	freed = false;
+
+	for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+	     block != NULL
+	     && !freed
 	     && (scan_all || scanned < srv_LRU_scan_depth);
 	     ++scanned) {
 
-		buf_block_t*	prev_block = UT_LIST_GET_PREV(unzip_LRU,
-						block);
+		buf_block_t*	prev_block;
+
+		prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
 
 		ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 		ut_ad(block->in_unzip_LRU_list);
@@ -989,43 +1019,44 @@ buf_LRU_free_from_unzip_LRU_list(
 			MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
 			scanned);
 	}
+
 	return(freed);
 }
 
 /******************************************************************//**
 Try to free a clean page from the common LRU list.
-@return	TRUE if freed */
-UNIV_INLINE
-ibool
+@return true if freed */
+static
+bool
 buf_LRU_free_from_common_LRU_list(
 /*==============================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
-					srv_LRU_scan_depth / 2 blocks. */
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
+					up to BUF_LRU_SEARCH_SCAN_THRESHOLD */
 {
-	buf_page_t*	bpage;
-	ibool		freed;
-	ulint		scanned;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	for (bpage = buf_pool->lru_scan_itr.start(),
-	     scanned = 0, freed = false;
-	     bpage != NULL && !freed
+	ulint		scanned = 0;
+	bool		freed = false;
+
+	for (buf_page_t* bpage = buf_pool->lru_scan_itr.start();
+	     bpage != NULL
+	     && !freed
 	     && (scan_all || scanned < BUF_LRU_SEARCH_SCAN_THRESHOLD);
 	     ++scanned, bpage = buf_pool->lru_scan_itr.get()) {
 
-		buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
+		buf_page_t*	prev = UT_LIST_GET_PREV(LRU, bpage);
+		BPageMutex*	mutex = buf_page_get_mutex(bpage);
+
 		buf_pool->lru_scan_itr.set(prev);
 
-		ib_mutex_t* mutex = buf_page_get_mutex(bpage);
 		mutex_enter(mutex);
 
 		ut_ad(buf_page_in_file(bpage));
 		ut_ad(bpage->in_LRU_list);
 
-		unsigned accessed = buf_page_is_accessed(bpage);
+		unsigned	accessed = buf_page_is_accessed(bpage);
 
 		if (buf_flush_ready_for_replace(bpage)) {
 			mutex_exit(mutex);
@@ -1058,47 +1089,45 @@ buf_LRU_free_from_common_LRU_list(
 
 /******************************************************************//**
 Try to free a replaceable block.
-@return	TRUE if found and freed */
-UNIV_INTERN
-ibool
+@return true if found and freed */
+bool
 buf_LRU_scan_and_free_block(
 /*========================*/
 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
-	ibool		scan_all)	/*!< in: scan whole LRU list
-					if TRUE, otherwise scan only
-					'old' blocks. */
+	bool		scan_all)	/*!< in: scan whole LRU list
+					if true, otherwise scan only
+					BUF_LRU_SEARCH_SCAN_THRESHOLD
+					blocks. */
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
-	       || buf_LRU_free_from_common_LRU_list(
-			buf_pool, scan_all));
+	       || buf_LRU_free_from_common_LRU_list(buf_pool, scan_all));
 }
 
 /******************************************************************//**
 Returns TRUE if less than 25 % of the buffer pool in any instance is
 available. This can be used in heuristics to prevent huge transactions
 eating up the whole buffer pool for their locks.
-@return	TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
+@return TRUE if less than 25 % of buffer pool left */
 ibool
 buf_LRU_buf_pool_running_out(void)
 /*==============================*/
 {
-	ulint	i;
 	ibool	ret = FALSE;
 
-	for (i = 0; i < srv_buf_pool_instances && !ret; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances && !ret; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
 
 		buf_pool_mutex_enter(buf_pool);
 
-		if (!recv_recovery_on
+		if (!recv_recovery_is_on()
 		    && UT_LIST_GET_LEN(buf_pool->free)
 		       + UT_LIST_GET_LEN(buf_pool->LRU)
-		       < buf_pool->curr_size / 4) {
+		       < ut_min(buf_pool->curr_size,
+				buf_pool->old_size) / 4) {
 
 			ret = TRUE;
 		}
@@ -1112,8 +1141,7 @@ buf_LRU_buf_pool_running_out(void)
 /******************************************************************//**
 Returns a free block from the buf_pool.  The block is taken off the
 free list.  If it is empty, returns NULL.
-@return	a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
+@return a free control block, or NULL if the buf_block->free list is empty */
 buf_block_t*
 buf_LRU_get_free_only(
 /*==================*/
@@ -1123,25 +1151,42 @@ buf_LRU_get_free_only(
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
+	block = reinterpret_cast<buf_block_t*>(
+		UT_LIST_GET_FIRST(buf_pool->free));
 
-	if (block) {
+	while (block != NULL) {
 
 		ut_ad(block->page.in_free_list);
 		ut_d(block->page.in_free_list = FALSE);
 		ut_ad(!block->page.in_flush_list);
 		ut_ad(!block->page.in_LRU_list);
 		ut_a(!buf_page_in_file(&block->page));
-		UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
+		UT_LIST_REMOVE(buf_pool->free, &block->page);
 
-		mutex_enter(&block->mutex);
+		if (buf_pool->curr_size >= buf_pool->old_size
+		    || UT_LIST_GET_LEN(buf_pool->withdraw)
+			>= buf_pool->withdraw_target
+		    || !buf_block_will_withdrawn(buf_pool, block)) {
+			/* found valid free block */
+			buf_page_mutex_enter(block);
 
-		buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
+			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+			UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
-		ut_ad(buf_pool_from_block(block) == buf_pool);
+			ut_ad(buf_pool_from_block(block) == buf_pool);
+
+			buf_page_mutex_exit(block);
+			break;
+		}
+
+		/* This should be withdrawn */
+		UT_LIST_ADD_LAST(
+			buf_pool->withdraw,
+			&block->page);
+		ut_d(block->in_withdraw_list = TRUE);
 
-		mutex_exit(&block->mutex);
+		block = reinterpret_cast<buf_block_t*>(
+			UT_LIST_GET_FIRST(buf_pool->free));
 	}
 
 	return(block);
@@ -1160,28 +1205,23 @@ buf_LRU_check_size_of_non_data_objects(
 {
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
-	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+	if (!recv_recovery_is_on()
+	    && buf_pool->curr_size == buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->free)
 	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
-		ut_print_timestamp(stderr);
 
-		fprintf(stderr,
-			"  InnoDB: ERROR: over 95 percent of the buffer pool"
-			" is occupied by\n"
-			"InnoDB: lock heaps or the adaptive hash index!"
-			" Check that your\n"
-			"InnoDB: transactions do not set too many row locks.\n"
-			"InnoDB: Your buffer pool size is %lu MB."
-			" Maybe you should make\n"
-			"InnoDB: the buffer pool bigger?\n"
-			"InnoDB: We intentionally generate a seg fault"
-			" to print a stack trace\n"
-			"InnoDB: on Linux!\n",
-			(ulong) (buf_pool->curr_size
-				 / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
-		ut_error;
-
-	} else if (!recv_recovery_on
+		ib::fatal() << "Over 95 percent of the buffer pool is"
+			" occupied by lock heaps or the adaptive hash index!"
+			" Check that your transactions do not set too many"
+			" row locks. Your buffer pool size is "
+			<< (buf_pool->curr_size
+				/ (1024 * 1024 / UNIV_PAGE_SIZE)) << " MB."
+			" Maybe you should make the buffer pool bigger?"
+			" We intentionally generate a seg fault to print"
+			" a stack trace on Linux!";
+
+	} else if (!recv_recovery_is_on()
+		   && buf_pool->curr_size == buf_pool->old_size
 		   && (UT_LIST_GET_LEN(buf_pool->free)
 		       + UT_LIST_GET_LEN(buf_pool->LRU))
 		   < buf_pool->curr_size / 3) {
@@ -1192,27 +1232,23 @@ buf_LRU_check_size_of_non_data_objects(
 			heaps or the adaptive hash index. This may be a memory
 			leak! */
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				"  InnoDB: WARNING: over 67 percent of"
-				" the buffer pool is occupied by\n"
-				"InnoDB: lock heaps or the adaptive"
-				" hash index! Check that your\n"
-				"InnoDB: transactions do not set too many"
-				" row locks.\n"
-				"InnoDB: Your buffer pool size is %lu MB."
-				" Maybe you should make\n"
-				"InnoDB: the buffer pool bigger?\n"
-				"InnoDB: Starting the InnoDB Monitor to print"
-				" diagnostics, including\n"
-				"InnoDB: lock heap and hash index sizes.\n",
-				(ulong) (buf_pool->curr_size
-					 / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
-			buf_lru_switched_on_innodb_mon = TRUE;
+			ib::warn() << "Over 67 percent of the buffer pool is"
+				" occupied by lock heaps or the adaptive hash"
+				" index! Check that your transactions do not"
+				" set too many row locks. Your buffer pool"
+				" size is "
+				<< (buf_pool->curr_size
+					 / (1024 * 1024 / UNIV_PAGE_SIZE))
+				<< " MB. Maybe you should make the buffer pool"
+				" bigger?. Starting the InnoDB Monitor to print"
+				" diagnostics, including lock heap and hash"
+				" index sizes.";
+
+			buf_lru_switched_on_innodb_mon = true;
 			srv_print_innodb_monitor = TRUE;
 			os_event_set(srv_monitor_event);
 		}
+
 	} else if (buf_lru_switched_on_innodb_mon) {
 
 		/* Switch off the InnoDB Monitor; this is a simple way
@@ -1220,7 +1256,7 @@ buf_LRU_check_size_of_non_data_objects(
 		but may also surprise users if the user also switched on the
 		monitor! */
 
-		buf_lru_switched_on_innodb_mon = FALSE;
+		buf_lru_switched_on_innodb_mon = false;
 		srv_print_innodb_monitor = FALSE;
 	}
 }
@@ -1248,19 +1284,18 @@ we put it to free list to be used.
     * scan LRU list even if buf_pool->try_LRU_scan is not set
 * iteration > 1:
   * same as iteration 1 but sleep 10ms
-@return	the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
+@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
 buf_block_t*
 buf_LRU_get_free_block(
 /*===================*/
 	buf_pool_t*	buf_pool)	/*!< in/out: buffer pool instance */
 {
 	buf_block_t*	block		= NULL;
-	ibool		freed		= FALSE;
+	bool		freed		= false;
 	ulint		n_iterations	= 0;
 	ulint		flush_failures	= 0;
-	ibool		mon_value_was	= FALSE;
-	ibool		started_monitor	= FALSE;
+	bool		mon_value_was	= false;
+	bool		started_monitor	= false;
 
 	MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
 loop:
@@ -1271,7 +1306,7 @@ loop:
 	/* If there is a block in the free list, take it */
 	block = buf_LRU_get_free_only(buf_pool);
 
-	if (block) {
+	if (block != NULL) {
 
 		buf_pool_mutex_exit(buf_pool);
 		ut_ad(buf_pool_from_block(block) == buf_pool);
@@ -1282,18 +1317,21 @@ loop:
 				static_cast<my_bool>(mon_value_was);
 		}
 
+		block->skip_flush_check = false;
+		block->page.flush_observer = NULL;
 		return(block);
 	}
 
-	freed = FALSE;
+	MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS );
+	freed = false;
 	if (buf_pool->try_LRU_scan || n_iterations > 0) {
 		/* If no block was in the free list, search from the
 		end of the LRU list and try to free a block there.
 		If we are doing for the first time we'll scan only
 		tail of the LRU list otherwise we scan the whole LRU
 		list. */
-		freed = buf_LRU_scan_and_free_block(buf_pool,
-						    n_iterations > 0);
+		freed = buf_LRU_scan_and_free_block(
+			buf_pool, n_iterations > 0);
 
 		if (!freed && n_iterations == 0) {
 			/* Tell other threads that there is no point
@@ -1312,42 +1350,33 @@ loop:
 
 	if (freed) {
 		goto loop;
-
 	}
 
-	if (n_iterations > 20) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Warning: difficult to find free blocks in\n"
-			"InnoDB: the buffer pool (%lu search iterations)!\n"
-			"InnoDB: %lu failed attempts to flush a page!"
-			" Consider\n"
-			"InnoDB: increasing the buffer pool size.\n"
-			"InnoDB: It is also possible that"
-			" in your Unix version\n"
-			"InnoDB: fsync is very slow, or"
-			" completely frozen inside\n"
-			"InnoDB: the OS kernel. Then upgrading to"
-			" a newer version\n"
-			"InnoDB: of your operating system may help."
-			" Look at the\n"
-			"InnoDB: number of fsyncs in diagnostic info below.\n"
-			"InnoDB: Pending flushes (fsync) log: %lu;"
-			" buffer pool: %lu\n"
-			"InnoDB: %lu OS file reads, %lu OS file writes,"
-			" %lu OS fsyncs\n"
-			"InnoDB: Starting InnoDB Monitor to print further\n"
-			"InnoDB: diagnostics to the standard output.\n",
-			(ulong) n_iterations,
-			(ulong)	flush_failures,
-			(ulong) fil_n_pending_log_flushes,
-			(ulong) fil_n_pending_tablespace_flushes,
-			(ulong) os_n_file_reads, (ulong) os_n_file_writes,
-			(ulong) os_n_fsyncs);
+	if (n_iterations > 20
+	    && srv_buf_pool_old_size == srv_buf_pool_size) {
+
+		ib::warn() << "Difficult to find free blocks in the buffer pool"
+			" (" << n_iterations << " search iterations)! "
+			<< flush_failures << " failed attempts to"
+			" flush a page! Consider increasing the buffer pool"
+			" size. It is also possible that in your Unix version"
+			" fsync is very slow, or completely frozen inside"
+			" the OS kernel. Then upgrading to a newer version"
+			" of your operating system may help. Look at the"
+			" number of fsyncs in diagnostic info below."
+			" Pending flushes (fsync) log: "
+			<< fil_n_pending_log_flushes
+			<< "; buffer pool: "
+			<< fil_n_pending_tablespace_flushes
+			<< ". " << os_n_file_reads << " OS file reads, "
+			<< os_n_file_writes << " OS file writes, "
+			<< os_n_fsyncs
+			<< " OS fsyncs. Starting InnoDB Monitor to print"
+			" further diagnostics to the standard output.";
 
 		mon_value_was = srv_print_innodb_monitor;
-		started_monitor = TRUE;
-		srv_print_innodb_monitor = TRUE;
+		started_monitor = true;
+		srv_print_innodb_monitor = true;
 		os_event_set(srv_monitor_event);
 	}
 
@@ -1355,7 +1384,13 @@ loop:
 	find a free block then we should sleep here to let the
 	page_cleaner do an LRU batch for us. */
 
+	if (!srv_read_only_mode) {
+		os_event_set(buf_flush_event);
+	}
+
 	if (n_iterations > 1) {
+
+		MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
 		os_thread_sleep(10000);
 	}
 
@@ -1363,11 +1398,13 @@ loop:
 	This call will flush one page from the LRU and put it on the
 	free list. That means that the free block is up for grabs for
 	all user threads.
+
 	TODO: A more elegant way would have been to return the freed
 	up block to the caller here but the code that deals with
 	removing the block from page_hash and LRU_list is fairly
 	involved (particularly in case of compressed pages). We
 	can do that in a separate patch sometime in future. */
+
 	if (!buf_flush_single_page_from_LRU(buf_pool)) {
 		MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
 		++flush_failures;
@@ -1457,8 +1494,6 @@ buf_LRU_old_init(
 /*=============*/
 	buf_pool_t*	buf_pool)
 {
-	buf_page_t*	bpage;
-
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
 
@@ -1466,10 +1501,13 @@ buf_LRU_old_init(
 	the adjust function to move the LRU_old pointer to the right
 	position */
 
-	for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
+	for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+	     bpage != NULL;
 	     bpage = UT_LIST_GET_PREV(LRU, bpage)) {
+
 		ut_ad(bpage->in_LRU_list);
 		ut_ad(buf_page_in_file(bpage));
+
 		/* This loop temporarily violates the
 		assertions of buf_page_set_old(). */
 		bpage->old = TRUE;
@@ -1491,24 +1529,21 @@ buf_unzip_LRU_remove_block_if_needed(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool);
-	ut_ad(bpage);
 	ut_ad(buf_page_in_file(bpage));
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	if (buf_page_belongs_to_unzip_LRU(bpage)) {
-		buf_block_t*	block = (buf_block_t*) bpage;
+		buf_block_t*	block = reinterpret_cast<buf_block_t*>(bpage);
 
 		ut_ad(block->in_unzip_LRU_list);
 		ut_d(block->in_unzip_LRU_list = FALSE);
 
-		UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+		UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
 	}
 }
 
 /******************************************************************//**
 Adjust LRU hazard pointers if needed. */
-
 void
 buf_LRU_adjust_hp(
 /*==============*/
@@ -1529,10 +1564,7 @@ buf_LRU_remove_block(
 	buf_page_t*	bpage)	/*!< in: control block */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	ulint		zip_size;
 
-	ut_ad(buf_pool);
-	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	ut_a(buf_page_in_file(bpage));
@@ -1546,7 +1578,7 @@ buf_LRU_remove_block(
 	/* If the LRU_old pointer is defined and points to just this block,
 	move it backward one step */
 
-	if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
+	if (bpage == buf_pool->LRU_old) {
 
 		/* Below: the previous block is guaranteed to exist,
 		because the LRU_old pointer is only allowed to differ
@@ -1566,11 +1598,10 @@ buf_LRU_remove_block(
 	}
 
 	/* Remove the block from the LRU list */
-	UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+	UT_LIST_REMOVE(buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = FALSE);
 
-	zip_size = page_zip_get_size(&bpage->zip);
-	buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+	buf_pool->stat.LRU_bytes -= bpage->size.physical();
 
 	buf_unzip_LRU_remove_block_if_needed(bpage);
 
@@ -1578,8 +1609,10 @@ buf_LRU_remove_block(
 	clear the "old" flags and return */
 	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
 
-		for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
+		for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+		     bpage != NULL;
 		     bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
+
 			/* This loop temporarily violates the
 			assertions of buf_page_set_old(). */
 			bpage->old = FALSE;
@@ -1605,7 +1638,6 @@ buf_LRU_remove_block(
 
 /******************************************************************//**
 Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
 void
 buf_unzip_LRU_add_block(
 /*====================*/
@@ -1615,8 +1647,6 @@ buf_unzip_LRU_add_block(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(buf_pool);
-	ut_ad(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
@@ -1625,18 +1655,17 @@ buf_unzip_LRU_add_block(
 	ut_d(block->in_unzip_LRU_list = TRUE);
 
 	if (old) {
-		UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+		UT_LIST_ADD_LAST(buf_pool->unzip_LRU, block);
 	} else {
-		UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
+		UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, block);
 	}
 }
 
 /******************************************************************//**
-Adds a block to the LRU list end. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INLINE
+Adds a block to the LRU list end. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
+static
 void
 buf_LRU_add_block_to_end_low(
 /*=========================*/
@@ -1644,14 +1673,12 @@ buf_LRU_add_block_to_end_low(
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
 
-	ut_ad(buf_pool);
-	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
 	ut_a(buf_page_in_file(bpage));
 
 	ut_ad(!bpage->in_LRU_list);
-	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+	UT_LIST_ADD_LAST(buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = TRUE);
 
 	incr_LRU_size_in_bytes(bpage, buf_pool);
@@ -1684,10 +1711,9 @@ buf_LRU_add_block_to_end_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
 UNIV_INLINE
 void
 buf_LRU_add_block_low(
@@ -1707,7 +1733,7 @@ buf_LRU_add_block_low(
 
 	if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
 
-		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
+		UT_LIST_ADD_FIRST(buf_pool->LRU, bpage);
 
 		bpage->freed_page_clock = buf_pool->freed_page_clock;
 	} else {
@@ -1720,8 +1746,9 @@ buf_LRU_add_block_low(
 		ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
 		     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
 #endif /* UNIV_LRU_DEBUG */
-		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
-				     bpage);
+		UT_LIST_INSERT_AFTER(buf_pool->LRU, buf_pool->LRU_old,
+			bpage);
+
 		buf_pool->LRU_old_len++;
 	}
 
@@ -1756,11 +1783,9 @@ buf_LRU_add_block_low(
 }
 
 /******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
+Adds a block to the LRU list. Please make sure that the page_size is
+already set when invoking the function, so that we can get correct
+page_size from the buffer page when adding a block into LRU */
 void
 buf_LRU_add_block(
 /*==============*/
@@ -1776,7 +1801,6 @@ buf_LRU_add_block(
 
 /******************************************************************//**
 Moves a block to the start of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_young(
 /*=====================*/
@@ -1796,7 +1820,6 @@ buf_LRU_make_block_young(
 
 /******************************************************************//**
 Moves a block to the end of the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_make_block_old(
 /*===================*/
@@ -1817,7 +1840,6 @@ accessible via bpage.
 The caller must hold buf_pool->mutex and must not hold any
 buf_page_get_mutex() when calling this function.
 @return true if freed, false otherwise. */
-UNIV_INTERN
 bool
 buf_LRU_free_page(
 /*===============*/
@@ -1827,11 +1849,10 @@ buf_LRU_free_page(
 {
 	buf_page_t*	b = NULL;
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	const ulint	fold = buf_page_address_fold(bpage->space,
-						     bpage->offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
 
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(buf_page_in_file(bpage));
@@ -1842,12 +1863,12 @@ buf_LRU_free_page(
 
 	if (!buf_page_can_relocate(bpage)) {
 
-		/* Do not free buffer fixed or I/O-fixed blocks. */
+		/* Do not free buffer fixed and I/O-fixed blocks. */
 		goto func_exit;
 	}
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
-	ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
+	ut_a(ibuf_count_get(bpage->id) == 0);
 #endif /* UNIV_IBUF_COUNT_DEBUG */
 
 	if (zip || !bpage->zip.data) {
@@ -1878,28 +1899,19 @@ func_exit:
 	ut_ad(bpage->in_LRU_list);
 	ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
 
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints) {
-		fprintf(stderr, "Putting space %lu page %lu to free list\n",
-			(ulong) buf_page_get_space(bpage),
-			(ulong) buf_page_get_page_no(bpage));
-	}
-#endif /* UNIV_DEBUG */
+	DBUG_PRINT("ib_buf", ("free page %u:%u",
+			      bpage->id.space(), bpage->id.page_no()));
 
-#ifdef UNIV_SYNC_DEBUG
-        ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+        ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
 	ut_ad(buf_page_can_relocate(bpage));
 
 	if (!buf_LRU_block_remove_hashed(bpage, zip)) {
 		return(true);
 	}
 
-#ifdef UNIV_SYNC_DEBUG
 	/* buf_LRU_block_remove_hashed() releases the hash_lock */
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
-	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+	      && !rw_lock_own(hash_lock, RW_LOCK_S));
 
 	/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
 	then it was a compressed page with an uncompressed frame and
@@ -1908,20 +1920,22 @@ func_exit:
 	into the LRU and page_hash (and possibly flush_list).
 	if b == NULL then it was a regular page that has been freed */
 
-	if (b) {
+	if (b != NULL) {
 		buf_page_t*	prev_b	= UT_LIST_GET_PREV(LRU, b);
 
 		rw_lock_x_lock(hash_lock);
+
 		mutex_enter(block_mutex);
 
-		ut_a(!buf_page_hash_get_low(
-				buf_pool, b->space, b->offset, fold));
+		ut_a(!buf_page_hash_get_low(buf_pool, b->id));
 
 		b->state = b->oldest_modification
 			? BUF_BLOCK_ZIP_DIRTY
 			: BUF_BLOCK_ZIP_PAGE;
-		UNIV_MEM_DESC(b->zip.data,
-			      page_zip_get_size(&b->zip));
+
+		ut_ad(b->size.is_compressed());
+
+		UNIV_MEM_DESC(b->zip.data, b->size.physical());
 
 		/* The fields in_page_hash and in_LRU_list of
 		the to-be-freed block descriptor should have
@@ -1930,6 +1944,7 @@ func_exit:
 		invokes buf_LRU_remove_block(). */
 		ut_ad(!bpage->in_page_hash);
 		ut_ad(!bpage->in_LRU_list);
+
 		/* bpage->state was BUF_BLOCK_FILE_PAGE because
 		b != NULL. The type cast below is thus valid. */
 		ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
@@ -1940,25 +1955,24 @@ func_exit:
 		ut_ad(b->in_page_hash);
 		ut_ad(b->in_LRU_list);
 
-		HASH_INSERT(buf_page_t, hash,
-			    buf_pool->page_hash, fold, b);
+		HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+			    b->id.fold(), b);
 
 		/* Insert b where bpage was in the LRU list. */
-		if (UNIV_LIKELY(prev_b != NULL)) {
+		if (prev_b != NULL) {
 			ulint	lru_len;
 
 			ut_ad(prev_b->in_LRU_list);
 			ut_ad(buf_page_in_file(prev_b));
-			UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
-					     prev_b, b);
+
+			UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, b);
 
 			incr_LRU_size_in_bytes(b, buf_pool);
 
 			if (buf_page_is_old(b)) {
 				buf_pool->LRU_old_len++;
-				if (UNIV_UNLIKELY
-				    (buf_pool->LRU_old
-				     == UT_LIST_GET_NEXT(LRU, b))) {
+				if (buf_pool->LRU_old
+				    == UT_LIST_GET_NEXT(LRU, b)) {
 
 					buf_pool->LRU_old = b;
 				}
@@ -1997,33 +2011,27 @@ func_exit:
 		}
 
 		bpage->zip.data = NULL;
+
 		page_zip_set_size(&bpage->zip, 0);
+
+		bpage->size.copy_from(page_size_t(bpage->size.logical(),
+						  bpage->size.logical(),
+						  false));
+
 		mutex_exit(block_mutex);
 
 		/* Prevent buf_page_get_gen() from
 		decompressing the block while we release
 		buf_pool->mutex and block_mutex. */
 		block_mutex = buf_page_get_mutex(b);
+
 		mutex_enter(block_mutex);
+
 		buf_page_set_sticky(b);
+
 		mutex_exit(block_mutex);
 
 		rw_lock_x_unlock(hash_lock);
-
-	} else {
-
-		/* There can be multiple threads doing an LRU scan to
-		free a block. The page_cleaner thread can be doing an
-		LRU batch whereas user threads can potentially be doing
-		multiple single page flushes. As we release
-		buf_pool->mutex below we need to make sure that no one
-		else considers this block as a victim for page
-		replacement. This block is already out of page_hash
-		and we are about to remove it from the LRU list and put
-		it on the free list. */
-		mutex_enter(block_mutex);
-		buf_page_set_sticky(bpage);
-		mutex_exit(block_mutex);
 	}
 
 	buf_pool_mutex_exit(buf_pool);
@@ -2040,8 +2048,8 @@ func_exit:
 	UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
 			 UNIV_PAGE_SIZE);
 
-	if (b) {
-		ib_uint32_t	checksum;
+	if (b != NULL) {
+
 		/* Compute and stamp the compressed page
 		checksum while not holding any mutex.  The
 		block is already half-freed
@@ -2049,12 +2057,13 @@ func_exit:
 		buf_pool->page_hash, thus inaccessible by any
 		other thread. */
 
-		checksum = static_cast<ib_uint32_t>(
-			page_zip_calc_checksum(
-				b->zip.data,
-				page_zip_get_size(&b->zip),
-				static_cast<srv_checksum_algorithm_t>(
-					srv_checksum_algorithm)));
+		ut_ad(b->size.is_compressed());
+
+		const uint32_t	checksum = page_zip_calc_checksum(
+			b->zip.data,
+			b->size.physical(),
+			static_cast<srv_checksum_algorithm_t>(
+				srv_checksum_algorithm));
 
 		mach_write_to_4(b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
 				checksum);
@@ -2062,17 +2071,21 @@ func_exit:
 
 	buf_pool_mutex_enter(buf_pool);
 
-	mutex_enter(block_mutex);
-	buf_page_unset_sticky(b != NULL ? b : bpage);
-	mutex_exit(block_mutex);
+	if (b != NULL) {
+		mutex_enter(block_mutex);
+
+		buf_page_unset_sticky(b);
+
+		mutex_exit(block_mutex);
+	}
 
 	buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+
 	return(true);
 }
 
 /******************************************************************//**
 Puts a block back to the free list. */
-UNIV_INTERN
 void
 buf_LRU_block_free_non_file_page(
 /*=============================*/
@@ -2081,9 +2094,8 @@ buf_LRU_block_free_non_file_page(
 	void*		data;
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 
-	ut_ad(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-	ut_ad(mutex_own(&block->mutex));
+	ut_ad(buf_page_mutex_own(block));
 
 	switch (buf_block_get_state(block)) {
 	case BUF_BLOCK_MEMORY:
@@ -2112,24 +2124,41 @@ buf_LRU_block_free_non_file_page(
 	/* Wipe page_no and space_id */
 	memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
 	memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
-#endif
+#endif /* UNIV_DEBUG */
 	data = block->page.zip.data;
 
-	if (data) {
+	if (data != NULL) {
 		block->page.zip.data = NULL;
-		mutex_exit(&block->mutex);
+		buf_page_mutex_exit(block);
 		buf_pool_mutex_exit_forbid(buf_pool);
 
-		buf_buddy_free(
-			buf_pool, data, page_zip_get_size(&block->page.zip));
+		ut_ad(block->page.size.is_compressed());
+
+		buf_buddy_free(buf_pool, data, block->page.size.physical());
 
 		buf_pool_mutex_exit_allow(buf_pool);
-		mutex_enter(&block->mutex);
+		buf_page_mutex_enter(block);
+
 		page_zip_set_size(&block->page.zip, 0);
+
+		block->page.size.copy_from(
+			page_size_t(block->page.size.logical(),
+				    block->page.size.logical(),
+				    false));
 	}
 
-	UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
-	ut_d(block->page.in_free_list = TRUE);
+	if (buf_pool->curr_size < buf_pool->old_size
+	    && UT_LIST_GET_LEN(buf_pool->withdraw) < buf_pool->withdraw_target
+	    && buf_block_will_withdrawn(buf_pool, block)) {
+		/* This should be withdrawn */
+		UT_LIST_ADD_LAST(
+			buf_pool->withdraw,
+			&block->page);
+		ut_d(block->in_withdraw_list = TRUE);
+	} else {
+		UT_LIST_ADD_FIRST(buf_pool->free, &block->page);
+		ut_d(block->page.in_free_list = TRUE);
+	}
 
 	UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
 }
@@ -2158,20 +2187,16 @@ buf_LRU_block_remove_hashed(
 	bool		zip)	/*!< in: true if should remove also the
 				compressed page of an uncompressed page */
 {
-	ulint			fold;
 	const buf_page_t*	hashed_bpage;
 	buf_pool_t*		buf_pool = buf_pool_from_bpage(bpage);
 	rw_lock_t*		hash_lock;
 
-	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own(buf_pool));
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 
-	fold = buf_page_address_fold(bpage->space, bpage->offset);
-	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-#ifdef UNIV_SYNC_DEBUG
-        ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
+	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+
+        ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
 
 	ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
 	ut_a(bpage->buf_fix_count == 0);
@@ -2188,13 +2213,11 @@ buf_LRU_block_remove_hashed(
 		buf_block_modify_clock_inc((buf_block_t*) bpage);
 		if (bpage->zip.data) {
 			const page_t*	page = ((buf_block_t*) bpage)->frame;
-			const ulint	zip_size
-				= page_zip_get_size(&bpage->zip);
 
 			ut_a(!zip || bpage->oldest_modification == 0);
+			ut_ad(bpage->size.is_compressed());
 
-			switch (UNIV_EXPECT(fil_page_get_type(page),
-					    FIL_PAGE_INDEX)) {
+			switch (fil_page_get_type(page)) {
 			case FIL_PAGE_TYPE_ALLOCATED:
 			case FIL_PAGE_INODE:
 			case FIL_PAGE_IBUF_BITMAP:
@@ -2207,13 +2230,14 @@ buf_LRU_block_remove_hashed(
 					to the compressed page, which will
 					be preserved. */
 					memcpy(bpage->zip.data, page,
-					       zip_size);
+					       bpage->size.physical());
 				}
 				break;
 			case FIL_PAGE_TYPE_ZBLOB:
 			case FIL_PAGE_TYPE_ZBLOB2:
 				break;
 			case FIL_PAGE_INDEX:
+			case FIL_PAGE_RTREE:
 #ifdef UNIV_ZIP_DEBUG
 				ut_a(page_zip_validate(
 					     &bpage->zip, page,
@@ -2221,14 +2245,16 @@ buf_LRU_block_remove_hashed(
 #endif /* UNIV_ZIP_DEBUG */
 				break;
 			default:
-				ut_print_timestamp(stderr);
-				fputs("  InnoDB: ERROR: The compressed page"
-				      " to be evicted seems corrupt:", stderr);
-				ut_print_buf(stderr, page, zip_size);
-				fputs("\nInnoDB: Possibly older version"
-				      " of the page:", stderr);
+				ib::error() << "The compressed page to be"
+					" evicted seems corrupt:";
+				ut_print_buf(stderr, page,
+					     bpage->size.logical());
+
+				ib::error() << "Possibly older version of"
+					" the page:";
+
 				ut_print_buf(stderr, bpage->zip.data,
-					     zip_size);
+					     bpage->size.physical());
 				putc('\n', stderr);
 				ut_error;
 			}
@@ -2238,8 +2264,10 @@ buf_LRU_block_remove_hashed(
 		/* fall through */
 	case BUF_BLOCK_ZIP_PAGE:
 		ut_a(bpage->oldest_modification == 0);
-		UNIV_MEM_ASSERT_W(bpage->zip.data,
-				  page_zip_get_size(&bpage->zip));
+		if (bpage->size.is_compressed()) {
+			UNIV_MEM_ASSERT_W(bpage->zip.data,
+					  bpage->size.physical());
+		}
 		break;
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_DIRTY:
@@ -2251,15 +2279,12 @@ buf_LRU_block_remove_hashed(
 		break;
 	}
 
-	hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space,
-					     bpage->offset, fold);
+	hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->id);
+
+	if (bpage != hashed_bpage) {
+		ib::error() << "Page " << bpage->id
+			<< " not found in the hash table";
 
-	if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
-		fprintf(stderr,
-			"InnoDB: Error: page %lu %lu not found"
-			" in the hash table\n",
-			(ulong) bpage->space,
-			(ulong) bpage->offset);
 #ifdef UNIV_DEBUG
 		fprintf(stderr,
 			"InnoDB: in_page_hash %lu in_zip_hash %lu\n"
@@ -2268,24 +2293,21 @@ buf_LRU_block_remove_hashed(
 			bpage->in_page_hash, bpage->in_zip_hash,
 			bpage->in_free_list, bpage->in_flush_list,
 			bpage->in_LRU_list, bpage->zip.data,
-			buf_page_get_zip_size(bpage),
+			bpage->size.logical(),
 			buf_page_get_state(bpage));
 #else
 		fprintf(stderr,
 			"InnoDB: zip.data %p zip_size %lu page_state %d\n",
 			bpage->zip.data,
-			buf_page_get_zip_size(bpage),
+			bpage->size.logical(),
 			buf_page_get_state(bpage));
 #endif
 
 		if (hashed_bpage) {
-			fprintf(stderr,
-				"InnoDB: In hash table we find block"
-				" %p of %lu %lu which is not %p\n",
-				(const void*) hashed_bpage,
-				(ulong) hashed_bpage->space,
-				(ulong) hashed_bpage->offset,
-				(const void*) bpage);
+
+			ib::error() << "In hash table we find block "
+				<< hashed_bpage << " of " << hashed_bpage->id
+				<< " which is not " << bpage;
 		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2303,26 +2325,28 @@ buf_LRU_block_remove_hashed(
 	ut_ad(!bpage->in_zip_hash);
 	ut_ad(bpage->in_page_hash);
 	ut_d(bpage->in_page_hash = FALSE);
-	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
+
+	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, bpage->id.fold(),
+		    bpage);
+
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_PAGE:
 		ut_ad(!bpage->in_free_list);
 		ut_ad(!bpage->in_flush_list);
 		ut_ad(!bpage->in_LRU_list);
 		ut_a(bpage->zip.data);
-		ut_a(buf_page_get_zip_size(bpage));
+		ut_a(bpage->size.is_compressed());
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-		UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
+		UT_LIST_REMOVE(buf_pool->zip_clean, bpage);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 		mutex_exit(&buf_pool->zip_mutex);
 		rw_lock_x_unlock(hash_lock);
 		buf_pool_mutex_exit_forbid(buf_pool);
 
-		buf_buddy_free(
-			buf_pool, bpage->zip.data,
-			page_zip_get_size(&bpage->zip));
+		buf_buddy_free(buf_pool, bpage->zip.data,
+			       bpage->size.physical());
 
 		buf_pool_mutex_exit_allow(buf_pool);
 		buf_page_free_descriptor(bpage);
@@ -2337,11 +2361,6 @@ buf_LRU_block_remove_hashed(
 				 UNIV_PAGE_SIZE);
 		buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
 
-		if (buf_pool->flush_rbt == NULL) {
-			bpage->space = ULINT32_UNDEFINED;
-			bpage->offset = ULINT32_UNDEFINED;
-		}
-
 		/* Question: If we release bpage and hash mutex here
 		then what protects us against:
 		1) Some other thread buffer fixing this page
@@ -2374,12 +2393,16 @@ buf_LRU_block_remove_hashed(
 			ut_ad(!bpage->in_LRU_list);
 			buf_pool_mutex_exit_forbid(buf_pool);
 
-			buf_buddy_free(
-				buf_pool, data,
-				page_zip_get_size(&bpage->zip));
+			buf_buddy_free(buf_pool, data, bpage->size.physical());
 
 			buf_pool_mutex_exit_allow(buf_pool);
+
 			page_zip_set_size(&bpage->zip, 0);
+
+			bpage->size.copy_from(
+				page_size_t(bpage->size.logical(),
+					    bpage->size.logical(),
+					    false));
 		}
 
 		return(true);
@@ -2406,21 +2429,23 @@ buf_LRU_block_free_hashed_page(
 	buf_block_t*	block)	/*!< in: block, must contain a file page and
 				be in a state where it can be freed */
 {
-#ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
 	ut_ad(buf_pool_mutex_own(buf_pool));
-#endif
 
-	mutex_enter(&block->mutex);
+	buf_page_mutex_enter(block);
+
+	if (buf_pool->flush_rbt == NULL) {
+		block->page.id.reset(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
+	}
+
 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
 
 	buf_LRU_block_free_non_file_page(block);
-	mutex_exit(&block->mutex);
+	buf_page_mutex_exit(block);
 }
 
 /******************************************************************//**
 Remove one page from LRU list and put it to free list */
-UNIV_INTERN
 void
 buf_LRU_free_one_page(
 /*==================*/
@@ -2429,10 +2454,9 @@ buf_LRU_free_one_page(
 				may or may not be a hash index to the page */
 {
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
-	const ulint	fold = buf_page_address_fold(bpage->space,
-						     bpage->offset);
-	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
+
+	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
+	BPageMutex*	block_mutex = buf_page_get_mutex(bpage);
 
 	ut_ad(buf_pool_mutex_own(buf_pool));
 
@@ -2444,16 +2468,15 @@ buf_LRU_free_one_page(
 	}
 
 	/* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
-	      && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X)
+	      && !rw_lock_own(hash_lock, RW_LOCK_S));
+
 	ut_ad(!mutex_own(block_mutex));
 }
 
 /**********************************************************************//**
 Updates buf_pool->LRU_old_ratio for one buffer pool instance.
-@return	updated old_pct */
+@return updated old_pct */
 static
 uint
 buf_LRU_old_ratio_update_instance(
@@ -2481,7 +2504,7 @@ buf_LRU_old_ratio_update_instance(
 			buf_pool->LRU_old_ratio = ratio;
 
 			if (UT_LIST_GET_LEN(buf_pool->LRU)
-			   >= BUF_LRU_OLD_MIN_LEN) {
+			    >= BUF_LRU_OLD_MIN_LEN) {
 
 				buf_LRU_old_adjust_len(buf_pool);
 			}
@@ -2498,9 +2521,8 @@ buf_LRU_old_ratio_update_instance(
 
 /**********************************************************************//**
 Updates buf_pool->LRU_old_ratio.
-@return	updated old_pct */
-UNIV_INTERN
-ulint
+@return updated old_pct */
+uint
 buf_LRU_old_ratio_update(
 /*=====================*/
 	uint	old_pct,/*!< in: Reserve this percentage of
@@ -2509,10 +2531,9 @@ buf_LRU_old_ratio_update(
 			FALSE=just assign buf_pool->LRU_old_ratio
 			during the initialization of InnoDB */
 {
-	ulint	i;
-	ulint	new_ratio = 0;
+	uint	new_ratio = 0;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
@@ -2527,24 +2548,22 @@ buf_LRU_old_ratio_update(
 /********************************************************************//**
 Update the historical stats that we are collecting for LRU eviction
 policy at the end of each interval. */
-UNIV_INTERN
 void
 buf_LRU_stat_update(void)
 /*=====================*/
 {
-	ulint		i;
 	buf_LRU_stat_t*	item;
 	buf_pool_t*	buf_pool;
-	ibool		evict_started = FALSE;
+	bool		evict_started = FALSE;
 	buf_LRU_stat_t	cur_stat;
 
 	/* If we haven't started eviction yet then don't update stats. */
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
 		buf_pool = buf_pool_from_array(i);
 
 		if (buf_pool->freed_page_clock != 0) {
-			evict_started = TRUE;
+			evict_started = true;
 			break;
 		}
 	}
@@ -2586,33 +2605,32 @@ buf_LRU_validate_instance(
 /*======================*/
 	buf_pool_t*	buf_pool)
 {
-	buf_page_t*	bpage;
-	buf_block_t*	block;
 	ulint		old_len;
 	ulint		new_len;
 
-	ut_ad(buf_pool);
 	buf_pool_mutex_enter(buf_pool);
 
 	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
 		ut_a(buf_pool->LRU_old);
 		old_len = buf_pool->LRU_old_len;
+
 		new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
 				 * buf_pool->LRU_old_ratio
 				 / BUF_LRU_OLD_RATIO_DIV,
 				 UT_LIST_GET_LEN(buf_pool->LRU)
 				 - (BUF_LRU_OLD_TOLERANCE
 				    + BUF_LRU_NON_OLD_MIN_LEN));
+
 		ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
 		ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
 	}
 
-	UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, CheckInLRUList());
+	CheckInLRUList::validate(buf_pool);
 
 	old_len = 0;
 
-	for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+	for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
 	     bpage != NULL;
              bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
 
@@ -2650,21 +2668,19 @@ buf_LRU_validate_instance(
 
 	ut_a(buf_pool->LRU_old_len == old_len);
 
-	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, CheckInFreeList());
+	CheckInFreeList::validate(buf_pool);
 
-	for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+	for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->free);
 	     bpage != NULL;
 	     bpage = UT_LIST_GET_NEXT(list, bpage)) {
 
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
 	}
 
-	UT_LIST_VALIDATE(
-                unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
-                CheckUnzipLRUAndLRUList());
+	CheckUnzipLRUAndLRUList::validate(buf_pool);
 
-	for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
-	     block;
+	for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
+	     block != NULL;
 	     block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
 
 		ut_ad(block->in_unzip_LRU_list);
@@ -2677,15 +2693,12 @@ buf_LRU_validate_instance(
 
 /**********************************************************************//**
 Validates the LRU list.
-@return	TRUE */
-UNIV_INTERN
+@return TRUE */
 ibool
 buf_LRU_validate(void)
 /*==================*/
 {
-	ulint	i;
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t*	buf_pool;
 
 		buf_pool = buf_pool_from_array(i);
@@ -2699,25 +2712,21 @@ buf_LRU_validate(void)
 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /**********************************************************************//**
 Prints the LRU list for one buffer pool instance. */
-UNIV_INTERN
 void
 buf_LRU_print_instance(
 /*===================*/
 	buf_pool_t*	buf_pool)
 {
-	const buf_page_t*	bpage;
-
-	ut_ad(buf_pool);
 	buf_pool_mutex_enter(buf_pool);
 
-	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	while (bpage != NULL) {
+	for (const buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+	     bpage != NULL;
+	     bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
 
 		mutex_enter(buf_page_get_mutex(bpage));
-		fprintf(stderr, "BLOCK space %lu page %lu ",
-			(ulong) buf_page_get_space(bpage),
-			(ulong) buf_page_get_page_no(bpage));
+
+		fprintf(stderr, "BLOCK space %u page %u ",
+			bpage->id.space(), bpage->id.page_no());
 
 		if (buf_page_is_old(bpage)) {
 			fputs("old ", stderr);
@@ -2742,17 +2751,17 @@ buf_LRU_print_instance(
 		case BUF_BLOCK_FILE_PAGE:
 			frame = buf_block_get_frame((buf_block_t*) bpage);
 			fprintf(stderr, "\ntype %lu"
-				" index id %llu\n",
+				" index id " UINT32PF "\n",
 				(ulong) fil_page_get_type(frame),
-				(ullint) btr_page_get_index_id(frame));
+				btr_page_get_index_id(frame));
 			break;
 		case BUF_BLOCK_ZIP_PAGE:
 			frame = bpage->zip.data;
 			fprintf(stderr, "\ntype %lu size %lu"
-				" index id %llu\n",
+				" index id " UINT32PF "\n",
 				(ulong) fil_page_get_type(frame),
-				(ulong) buf_page_get_zip_size(bpage),
-				(ullint) btr_page_get_index_id(frame));
+				(ulong) bpage->size.physical(),
+				btr_page_get_index_id(frame));
 			break;
 
 		default:
@@ -2762,7 +2771,6 @@ buf_LRU_print_instance(
 		}
 
 		mutex_exit(buf_page_get_mutex(bpage));
-		bpage = UT_LIST_GET_NEXT(LRU, bpage);
 	}
 
 	buf_pool_mutex_exit(buf_pool);
@@ -2770,15 +2778,13 @@ buf_LRU_print_instance(
 
 /**********************************************************************//**
 Prints the LRU list. */
-UNIV_INTERN
 void
 buf_LRU_print(void)
 /*===============*/
 {
-	ulint		i;
-	buf_pool_t*	buf_pool;
+	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
 
-	for (i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool = buf_pool_from_array(i);
 		buf_LRU_print_instance(buf_pool);
 	}